diff --git a/.clang-format b/.clang-format index 52e01497187..7746106fcb5 100644 --- a/.clang-format +++ b/.clang-format @@ -89,7 +89,7 @@ PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 -RemoveBracesLLVM: true +RemoveBracesLLVM: false SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements diff --git a/.clang-tidy b/.clang-tidy index dc1cebe9430..896052915f7 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -22,6 +22,7 @@ Checks: [ '-bugprone-exception-escape', '-bugprone-forward-declaration-namespace', '-bugprone-implicit-widening-of-multiplication-result', + '-bugprone-multi-level-implicit-pointer-conversion', '-bugprone-narrowing-conversions', '-bugprone-not-null-terminated-result', '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged @@ -36,6 +37,7 @@ Checks: [ '-cert-oop54-cpp', '-cert-oop57-cpp', + '-clang-analyzer-optin.core.EnumCastOutOfRange', # https://github.com/abseil/abseil-cpp/issues/1667 '-clang-analyzer-optin.performance.Padding', '-clang-analyzer-unix.Malloc', @@ -93,11 +95,13 @@ Checks: [ '-modernize-pass-by-value', '-modernize-return-braced-init-list', '-modernize-use-auto', + '-modernize-use-constraints', # This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872 '-modernize-use-default-member-init', '-modernize-use-emplace', '-modernize-use-nodiscard', '-modernize-use-trailing-return-type', + '-performance-enum-size', '-performance-inefficient-string-concatenation', '-performance-no-int-to-ptr', '-performance-avoid-endl', @@ -105,6 +109,7 @@ Checks: [ '-portability-simd-intrinsics', + '-readability-avoid-nested-conditional-operator', '-readability-avoid-unconditional-preprocessor-if', '-readability-braces-around-statements', '-readability-convert-member-functions-to-static', @@ -118,6 +123,13 @@ Checks: [ '-readability-magic-numbers', '-readability-named-parameter', '-readability-redundant-declaration', + '-readability-redundant-inline-specifier', # useful but incompatible with __attribute((always_inline))__ (aka. ALWAYS_INLINE, base/base/defines.h). + # ALWAYS_INLINE only has an effect if combined with `inline`: https://godbolt.org/z/Eefd74qdM + '-readability-redundant-member-init', # Useful but triggers another problem. Imagine a struct S with multiple String members. Structs are often instantiated via designated + # initializer S s{.s1 = [...], .s2 = [...], [...]}. In this case, compiler warning `missing-field-initializers` requires to specify all members which are not in-struct + # initialized (example: s1 in struct S { String s1; String s2{};}; is not in-struct initialized, therefore it must be specified at instantiation time). As explicitly + # specifying all members is tedious for large structs, `missing-field-initializers` makes programmers initialize as many members as possible in-struct. Clang-tidy + # warning `readability-redundant-member-init` does the opposite thing, both are not compatible with each other. '-readability-simplify-boolean-expr', '-readability-suspicious-call-argument', '-readability-uppercase-literal-suffix', diff --git a/.github/ISSUE_TEMPLATE/10_question.md b/.github/ISSUE_TEMPLATE/10_question.md index 0992bf06217..08a05a844e0 100644 --- a/.github/ISSUE_TEMPLATE/10_question.md +++ b/.github/ISSUE_TEMPLATE/10_question.md @@ -10,3 +10,11 @@ assignees: '' > Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse > If you still prefer GitHub issues, remove all this text and ask your question here. + +**Company or project name** + +Put your company name or project description here + +**Question** + +Your question diff --git a/.github/ISSUE_TEMPLATE/20_feature-request.md b/.github/ISSUE_TEMPLATE/20_feature-request.md index f59dbc2c40f..cf5ac000a23 100644 --- a/.github/ISSUE_TEMPLATE/20_feature-request.md +++ b/.github/ISSUE_TEMPLATE/20_feature-request.md @@ -9,6 +9,10 @@ assignees: '' > (you don't have to strictly follow this form) +**Company or project name** + +> Put your company name or project description here + **Use case** > A clear and concise description of what is the intended usage scenario is. diff --git a/.github/ISSUE_TEMPLATE/30_unexpected-behaviour.md b/.github/ISSUE_TEMPLATE/30_unexpected-behaviour.md index 3630d95ba33..73c861886e6 100644 --- a/.github/ISSUE_TEMPLATE/30_unexpected-behaviour.md +++ b/.github/ISSUE_TEMPLATE/30_unexpected-behaviour.md @@ -9,6 +9,10 @@ assignees: '' (you don't have to strictly follow this form) +**Company or project name** + +Put your company name or project description here + **Describe the unexpected behaviour** A clear and concise description of what works not as it is supposed to. diff --git a/.github/ISSUE_TEMPLATE/35_incomplete_implementation.md b/.github/ISSUE_TEMPLATE/35_incomplete_implementation.md index 6a014ce3c29..45f752b53ef 100644 --- a/.github/ISSUE_TEMPLATE/35_incomplete_implementation.md +++ b/.github/ISSUE_TEMPLATE/35_incomplete_implementation.md @@ -9,6 +9,10 @@ assignees: '' (you don't have to strictly follow this form) +**Company or project name** + +Put your company name or project description here + **Describe the unexpected behaviour** A clear and concise description of what works not as it is supposed to. diff --git a/.github/ISSUE_TEMPLATE/45_usability-issue.md b/.github/ISSUE_TEMPLATE/45_usability-issue.md index b03b11606c1..79f23fe0a14 100644 --- a/.github/ISSUE_TEMPLATE/45_usability-issue.md +++ b/.github/ISSUE_TEMPLATE/45_usability-issue.md @@ -9,6 +9,9 @@ assignees: '' (you don't have to strictly follow this form) +**Company or project name** +Put your company name or project description here + **Describe the issue** A clear and concise description of what works not as it is supposed to. diff --git a/.github/ISSUE_TEMPLATE/50_build-issue.md b/.github/ISSUE_TEMPLATE/50_build-issue.md index 9b05fbbdd13..5a58add9ad8 100644 --- a/.github/ISSUE_TEMPLATE/50_build-issue.md +++ b/.github/ISSUE_TEMPLATE/50_build-issue.md @@ -9,6 +9,10 @@ assignees: '' > Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/ +**Company or project name** + +> Put your company name or project description here + **Operating system** > OS kind or distribution, specific version/release, non-standard kernel if any. If you are trying to build inside virtual machine, please mention it too. diff --git a/.github/ISSUE_TEMPLATE/60_documentation-issue.md b/.github/ISSUE_TEMPLATE/60_documentation-issue.md index 557e5ea43c9..5a941977dac 100644 --- a/.github/ISSUE_TEMPLATE/60_documentation-issue.md +++ b/.github/ISSUE_TEMPLATE/60_documentation-issue.md @@ -8,6 +8,9 @@ labels: comp-documentation (you don't have to strictly follow this form) +**Company or project name** +Put your company name or project description here + **Describe the issue** A clear and concise description of what's wrong in documentation. diff --git a/.github/ISSUE_TEMPLATE/70_performance-issue.md b/.github/ISSUE_TEMPLATE/70_performance-issue.md index d0e549039a6..21eba3f5af1 100644 --- a/.github/ISSUE_TEMPLATE/70_performance-issue.md +++ b/.github/ISSUE_TEMPLATE/70_performance-issue.md @@ -9,6 +9,9 @@ assignees: '' (you don't have to strictly follow this form) +**Company or project name** +Put your company name or project description here + **Describe the situation** What exactly works slower than expected? diff --git a/.github/ISSUE_TEMPLATE/80_backward-compatibility.md b/.github/ISSUE_TEMPLATE/80_backward-compatibility.md index a13e9508f70..8058f5bcc53 100644 --- a/.github/ISSUE_TEMPLATE/80_backward-compatibility.md +++ b/.github/ISSUE_TEMPLATE/80_backward-compatibility.md @@ -9,6 +9,9 @@ assignees: '' (you don't have to strictly follow this form) +**Company or project name** +Put your company name or project description here + **Describe the issue** A clear and concise description of what works not as it is supposed to. diff --git a/.github/ISSUE_TEMPLATE/85_bug-report.md b/.github/ISSUE_TEMPLATE/85_bug-report.md index 6bf265260ac..c43473d63ad 100644 --- a/.github/ISSUE_TEMPLATE/85_bug-report.md +++ b/.github/ISSUE_TEMPLATE/85_bug-report.md @@ -11,6 +11,10 @@ assignees: '' > You have to provide the following information whenever possible. +**Company or project name** + +> Put your company name or project description here + **Describe what's wrong** > A clear and concise description of what works not as it is supposed to. diff --git a/.github/ISSUE_TEMPLATE/96_installation-issues.md b/.github/ISSUE_TEMPLATE/96_installation-issues.md index e4be8af86b6..5f1b6cfd640 100644 --- a/.github/ISSUE_TEMPLATE/96_installation-issues.md +++ b/.github/ISSUE_TEMPLATE/96_installation-issues.md @@ -7,6 +7,10 @@ assignees: '' --- +**Company or project name** + +Put your company name or project description here + **I have tried the following solutions**: https://clickhouse.com/docs/en/faq/troubleshooting/#troubleshooting-installation-errors **Installation type** diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 950e672272a..51a1a6e2df8 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,6 +11,7 @@ tests/ci/cancel_and_rerun_workflow_lambda/app.py - Backward Incompatible Change - Build/Testing/Packaging Improvement - Documentation (changelog entry is not required) +- Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) - Bug Fix (user-visible misbehavior in an official stable release) - CI Fix or Improvement (changelog entry is not required) - Not for changelog (changelog entry is not required) @@ -41,48 +42,27 @@ At a minimum, the following information should be added (but add more as needed) > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/ -
- Modify your CI run - -**NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing -**NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step - -#### Include tests (required builds will be added automatically): -- [ ] Fast test -- [ ] Integration Tests -- [ ] Stateless tests -- [ ] Stateful tests -- [ ] Unit tests -- [ ] Performance tests -- [ ] All with ASAN -- [ ] All with TSAN -- [ ] All with Analyzer -- [ ] All with Azure -- [ ] Add your option here - -#### Exclude tests: -- [ ] Fast test -- [ ] Integration Tests -- [ ] Stateless tests -- [ ] Stateful tests -- [ ] Performance tests -- [ ] All with ASAN -- [ ] All with TSAN -- [ ] All with MSAN -- [ ] All with UBSAN -- [ ] All with Coverage -- [ ] All with Aarch64 -- [ ] Add your option here - -#### Extra options: -- [ ] do not test (only style check) -- [ ] disable merge-commit (no merge from master before tests) -- [ ] disable CI cache (job reuse) - -#### Only specified batches in multi-batch jobs: -- [ ] 1 -- [ ] 2 -- [ ] 3 -- [ ] 4 - -
+#### CI Settings (Only check the boxes if you know what you are doing): +- [ ] Allow: All Required Checks +- [ ] Allow: Stateless tests +- [ ] Allow: Stateful tests +- [ ] Allow: Integration Tests +- [ ] Allow: Performance tests +- [ ] Allow: All NOT Required Checks +- [ ] Allow: batch 1, 2 for multi-batch jobs +- [ ] Allow: batch 3, 4, 5, 6 for multi-batch jobs +--- +- [ ] Exclude: Style check +- [ ] Exclude: Fast test +- [ ] Exclude: Integration Tests +- [ ] Exclude: Stateless tests +- [ ] Exclude: Stateful tests +- [ ] Exclude: Performance tests +- [ ] Exclude: All with ASAN +- [ ] Exclude: All with Aarch64 +- [ ] Exclude: All with TSAN, MSAN, UBSAN, Coverage +--- +- [ ] Do not test +- [ ] Upload binaries for special builds +- [ ] Disable merge-commit +- [ ] Disable CI cache diff --git a/.github/actions/common_setup/action.yml b/.github/actions/common_setup/action.yml index e492fa97816..b9299c64e72 100644 --- a/.github/actions/common_setup/action.yml +++ b/.github/actions/common_setup/action.yml @@ -28,3 +28,10 @@ runs: run: | # to remove every leftovers sudo rm -fr "$TEMP_PATH" && mkdir -p "$TEMP_PATH" + - name: Tune vm.mmap_rnd_bits for sanitizers + shell: bash + run: | + sudo sysctl vm.mmap_rnd_bits + # https://github.com/google/sanitizers/issues/856 + echo "Tune vm.mmap_rnd_bits for sanitizers" + sudo sysctl vm.mmap_rnd_bits=28 diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 2a98722414b..b0380b939bb 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -9,6 +9,12 @@ on: # yamllint disable-line rule:truthy push: branches: - 'backport/**' + +# Cancel the previous wf run in PRs. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: RunConfig: runs-on: [self-hosted, style-checker-aarch64] diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml deleted file mode 100644 index 3c2be767ad2..00000000000 --- a/.github/workflows/cancel.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Cancel - -env: - # Force the stdout and stderr streams to be unbuffered - PYTHONUNBUFFERED: 1 - -on: # yamllint disable-line rule:truthy - workflow_run: - workflows: ["PullRequestCI", "ReleaseBranchCI", "DocsCheck", "BackportPR"] - types: - - requested -jobs: - cancel: - runs-on: [self-hosted, style-checker] - steps: - - uses: styfle/cancel-workflow-action@0.9.1 - with: - all_but_latest: true - workflow_id: ${{ github.event.workflow.id }} diff --git a/.github/workflows/debug.yml b/.github/workflows/debug.yml deleted file mode 100644 index 5abed268ecd..00000000000 --- a/.github/workflows/debug.yml +++ /dev/null @@ -1,11 +0,0 @@ -# The CI for each commit, prints envs and content of GITHUB_EVENT_PATH -name: Debug - -'on': - [push, pull_request, pull_request_review, release, workflow_dispatch, workflow_call] - -jobs: - DebugInfo: - runs-on: ubuntu-latest - steps: - - uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 125096209df..c2a893a8e99 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -27,15 +27,16 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 sync_pr.py --merge || : - - name: Python unit tests - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - echo "Testing the main ci directory" - python3 -m unittest discover -s . -p 'test_*.py' - for dir in *_lambda/; do - echo "Testing $dir" - python3 -m unittest discover -s "$dir" -p 'test_*.py' - done +# Runs in MQ: +# - name: Python unit tests +# run: | +# cd "$GITHUB_WORKSPACE/tests/ci" +# echo "Testing the main ci directory" +# python3 -m unittest discover -s . -p 'test_*.py' +# for dir in *_lambda/; do +# echo "Testing $dir" +# python3 -m unittest discover -s "$dir" -p 'test_*.py' +# done - name: PrepareRunConfig id: runconfig run: | @@ -53,13 +54,13 @@ jobs: - name: Re-create GH statuses for skipped jobs if any run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses - BuildDockers: - needs: [RunConfig] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml - with: - data: ${{ needs.RunConfig.outputs.data }} - # Tested in MQ +# Runs in MQ: +# BuildDockers: +# needs: [RunConfig] +# if: ${{ !failure() && !cancelled() }} +# uses: ./.github/workflows/reusable_docker.yml +# with: +# data: ${{ needs.RunConfig.outputs.data }} # StyleCheck: # needs: [RunConfig, BuildDockers] # if: ${{ !failure() && !cancelled() }} @@ -70,253 +71,73 @@ jobs: # data: ${{ needs.RunConfig.outputs.data }} # run_command: | # python3 style_check.py --no-push - CompatibilityCheckX86: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml + + ################################# Main stages ################################# + # for main CI chain + # + Builds_1: + needs: [RunConfig] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }} + # using callable wf (reusable_stage.yml) allows grouping all nested jobs under a tab + uses: ./.github/workflows/reusable_build_stage.yml with: - test_name: Compatibility check (amd64) - runner_type: style-checker + stage: Builds_1 data: ${{ needs.RunConfig.outputs.data }} - CompatibilityCheckAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml + Tests_1: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - test_name: Compatibility check (aarch64) - runner_type: style-checker + stage: Tests_1 data: ${{ needs.RunConfig.outputs.data }} -######################################################################################### -#################################### ORDINARY BUILDS #################################### -######################################################################################### -# TODO: never skip builds! - BuilderDebRelease: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + Builds_2: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }} + uses: ./.github/workflows/reusable_build_stage.yml with: - build_name: package_release - checkout_depth: 0 + stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} - BuilderDebReleaseCoverage: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + Tests_2: + needs: [RunConfig, Builds_2] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - build_name: package_release_coverage - checkout_depth: 0 + stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - BuilderDebAarch64: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + # stage for jobs that do not prohibit merge + Tests_3: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - build_name: package_aarch64 - checkout_depth: 0 + stage: Tests_3 data: ${{ needs.RunConfig.outputs.data }} - BuilderBinRelease: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_release - checkout_depth: 0 # otherwise we will have no info about contributors - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebAsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_asan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebUBsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_ubsan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebTsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_tsan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebMsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_msan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebDebug: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_debug - data: ${{ needs.RunConfig.outputs.data }} -########################################################################################## -##################################### SPECIAL BUILDS ##################################### -########################################################################################## - BuilderBinClangTidy: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_tidy - data: ${{ needs.RunConfig.outputs.data }} - BuilderBinDarwin: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_darwin - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAarch64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_aarch64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinFreeBSD: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_freebsd - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinDarwinAarch64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_darwin_aarch64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinPPC64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_ppc64le - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAmd64Compat: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_amd64_compat - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAmd64Musl: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_amd64_musl - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAarch64V80Compat: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_aarch64_v80compat - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinRISCV64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_riscv64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinS390X: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_s390x - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 -############################################################################################ -##################################### Docker images ####################################### -############################################################################################ - DockerServerImage: - needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Docker server image - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} - DockerKeeperImage: - needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Docker keeper image - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################ -##################################### BUILD REPORTER ####################################### -############################################################################################ - BuilderReport: + + ################################# Reports ################################# + # Reports should be run even if Builds_1/2 failed - put them separately in wf (not in Tests_1/2) + Builds_1_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebDebug - - BuilderDebMsan - - BuilderDebRelease - - BuilderDebTsan - - BuilderDebUBsan + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} + needs: [RunConfig, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: + Builds_2_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinAarch64 - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - - BuilderBinFreeBSD - - BuilderBinPPC64 - - BuilderBinRISCV64 - - BuilderBinS390X - - BuilderBinAmd64Compat - - BuilderBinAarch64V80Compat - - BuilderBinClangTidy - - BuilderBinAmd64Musl - - BuilderDebReleaseCoverage - - BuilderBinRelease + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} + needs: [RunConfig, Builds_2] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse special build check runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} + MarkReleaseReady: if: ${{ !failure() && !cancelled() }} - needs: - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - - BuilderDebRelease - - BuilderDebAarch64 - runs-on: [self-hosted, style-checker] + needs: [RunConfig, Builds_1, Builds_2] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Debug run: | @@ -329,7 +150,7 @@ jobs: no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} EOF - name: Not ready - # fail the job to be able restart it + # fail the job to be able to restart it if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} run: exit 1 - name: Check out repository code @@ -340,544 +161,14 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 mark_release_ready.py -############################################################################################ -#################################### INSTALL PACKAGES ###################################### -############################################################################################ - InstallPackagesTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Install packages (amd64) - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 install_check.py "$CHECK_NAME" - InstallPackagesTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Install packages (arm64) - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 install_check.py "$CHECK_NAME" -############################################################################################## -########################### FUNCTIONAl STATELESS TESTS ####################################### -############################################################################################## - FunctionalStatelessTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestReleaseAnalyzerS3Replicated: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (release, old analyzer, s3, DatabaseReplicated) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestS3Debug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (debug, s3 storage) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestS3Tsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (tsan, s3 storage) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (tsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (msan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (ubsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (debug) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAsanAzure: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (azure, asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -############################ FUNCTIONAl STATEFUL TESTS ####################################### -############################################################################################## - FunctionalStatefulTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (tsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (msan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (ubsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (debug) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - # Parallel replicas - FunctionalStatefulTestDebugParallelReplicas: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (debug, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestUBsanParallelReplicas: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (ubsan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestMsanParallelReplicas: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (msan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestTsanParallelReplicas: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (tsan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAsanParallelReplicas: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (asan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestReleaseParallelReplicas: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (release, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -########################### ClickBench ####################################################### -############################################################################################## - ClickBenchAMD64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickBench (amd64) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 clickbench.py "$CHECK_NAME" - ClickBenchAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickBench (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 clickbench.py "$CHECK_NAME" -############################################################################################## -######################################### STRESS TESTS ####################################### -############################################################################################## - StressTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (asan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestTsanAzure: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (azure, tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (msan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (ubsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (debug) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -############################# INTEGRATION TESTS ############################################# -############################################################################################# - IntegrationTestsAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (asan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsAnalyzerAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (asan, old analyzer) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (release) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -##################################### AST FUZZERS ############################################ -############################################################################################## - ASTFuzzerTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (asan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (tsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestUBSan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (ubsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestMSan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (msan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (debug) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -#################################### UNIT TESTS ############################################# -############################################################################################# - UnitTestsAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (asan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsReleaseClang: - needs: [RunConfig, BuilderBinRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (release) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (tsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (msan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (ubsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -#################################### PERFORMANCE TESTS ###################################### -############################################################################################# - PerformanceComparisonX86: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Performance Comparison - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - PerformanceComparisonAarch: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Performance Comparison Aarch64 - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -############################ SQLLOGIC TEST ################################################### -############################################################################################## - SQLLogicTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Sqllogic test (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -##################################### SQL TEST ############################################### -############################################################################################## - SQLTest: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLTest - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -###################################### SQLANCER FUZZERS ###################################### -############################################################################################## - SQLancerTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLancer (release) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - SQLancerTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLancer (debug) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} - needs: - - MarkReleaseReady - - FunctionalStatelessTestDebug - - FunctionalStatelessTestRelease - - FunctionalStatelessTestReleaseAnalyzerS3Replicated - - FunctionalStatelessTestAarch64 - - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan - - FunctionalStatelessTestUBsan - - FunctionalStatelessTestS3Debug - - FunctionalStatelessTestS3Tsan - - FunctionalStatefulTestDebug - - FunctionalStatefulTestRelease - - FunctionalStatefulTestAarch64 - - FunctionalStatefulTestAsan - - FunctionalStatefulTestTsan - - FunctionalStatefulTestMsan - - FunctionalStatefulTestUBsan - - FunctionalStatefulTestDebugParallelReplicas - - FunctionalStatefulTestUBsanParallelReplicas - - FunctionalStatefulTestMsanParallelReplicas - - FunctionalStatefulTestTsanParallelReplicas - - FunctionalStatefulTestAsanParallelReplicas - - FunctionalStatefulTestReleaseParallelReplicas - - StressTestDebug - - StressTestAsan - - StressTestTsan - - StressTestMsan - - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsAnalyzerAsan - - IntegrationTestsTsan - - IntegrationTestsRelease - - PerformanceComparisonX86 - - PerformanceComparisonAarch - - CompatibilityCheckX86 - - CompatibilityCheckAarch64 - - ASTFuzzerTestDebug - - ASTFuzzerTestAsan - - ASTFuzzerTestTsan - - ASTFuzzerTestMSan - - ASTFuzzerTestUBSan - - UnitTestsAsan - - UnitTestsTsan - - UnitTestsMsan - - UnitTestsUBsan - - UnitTestsReleaseClang - - SQLancerTestRelease - - SQLancerTestDebug - - SQLLogicTestRelease - - SQLTest - runs-on: [self-hosted, style-checker] + if: ${{ !cancelled() }} + needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 - with: - clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml new file mode 100644 index 00000000000..c8b2452829b --- /dev/null +++ b/.github/workflows/merge_queue.yml @@ -0,0 +1,115 @@ +# yamllint disable rule:comments-indentation +name: MergeQueueCI + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + +on: # yamllint disable-line rule:truthy + merge_group: + +jobs: + RunConfig: + runs-on: [self-hosted, style-checker-aarch64] + outputs: + data: ${{ steps.runconfig.outputs.CI_DATA }} + steps: + - name: DebugInfo + uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true # to ensure correct digests + fetch-depth: 0 # to get a version + filter: tree:0 + - name: Cancel PR workflow + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run + - name: Python unit tests + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + echo "Testing the main ci directory" + python3 -m unittest discover -s . -p 'test_*.py' + for dir in *_lambda/; do + echo "Testing $dir" + python3 -m unittest discover -s "$dir" -p 'test_*.py' + done + - name: PrepareRunConfig + id: runconfig + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json + + echo "::group::CI configuration" + python3 -m json.tool ${{ runner.temp }}/ci_run_data.json + echo "::endgroup::" + + { + echo 'CI_DATA<> "$GITHUB_OUTPUT" + BuildDockers: + needs: [RunConfig] + if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} + uses: ./.github/workflows/reusable_docker.yml + with: + data: ${{ needs.RunConfig.outputs.data }} + StyleCheck: + needs: [RunConfig, BuildDockers] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Style check')}} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Style check + runner_type: style-checker-aarch64 + run_command: | + python3 style_check.py + data: ${{ needs.RunConfig.outputs.data }} + secrets: + secret_envs: | + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_OUTPUT" - name: Re-create GH statuses for skipped jobs if any - if: ${{ github.event_name != 'merge_group' }} run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses BuildDockers: @@ -73,7 +81,7 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: Style check - runner_type: style-checker + runner_type: style-checker-aarch64 run_command: | python3 style_check.py data: ${{ needs.RunConfig.outputs.data }} @@ -83,7 +91,7 @@ jobs: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} RCSK FastTest: - needs: [RunConfig, BuildDockers] + needs: [RunConfig, BuildDockers, StyleCheck] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Fast test') }} uses: ./.github/workflows/reusable_test.yml with: @@ -93,13 +101,13 @@ jobs: run_command: | python3 fast_test_check.py - ################################# Main statges ################################# + ################################# Main stages ################################# # for main CI chain # Builds_1: needs: [RunConfig, StyleCheck, FastTest] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab + # using callable wf (reusable_stage.yml) allows grouping all nested jobs under a tab uses: ./.github/workflows/reusable_build_stage.yml with: stage: Builds_1 @@ -107,7 +115,6 @@ jobs: Tests_1: needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_1 @@ -115,7 +122,6 @@ jobs: Builds_2: needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_build_stage.yml with: stage: Builds_2 @@ -123,20 +129,25 @@ jobs: Tests_2: needs: [RunConfig, Builds_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} + # stage for jobs that do not prohibit merge + Tests_3: + needs: [RunConfig, Tests_1, Tests_2] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} + uses: ./.github/workflows/reusable_test_stage.yml + with: + stage: Tests_3 + data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# - # Reports should by run even if Builds_1/2 fail, so put them separatly in wf (not in Tests_1/2) + # Reports should by run even if Builds_1/2 fail, so put them separately in wf (not in Tests_1/2) Builds_1_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} - needs: - - RunConfig - - Builds_1 + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} + needs: [RunConfig, StyleCheck, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check @@ -144,40 +155,43 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} Builds_2_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} - needs: - - RunConfig - - Builds_2 + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} + needs: [RunConfig, StyleCheck, Builds_2] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse special build check runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - ################################# Stage Final ################################# - # - FinishCheck: - if: ${{ !failure() && !cancelled() }} + CheckReadyForMerge: + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 - - name: Check sync status - if: ${{ github.event_name == 'merge_group' }} + with: + filter: tree:0 + - name: Check and set merge status run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 sync_pr.py --status + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + + ################################# Stage Final ################################# + # + FinishCheck: + if: ${{ !cancelled() }} + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + filter: tree:0 - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }} - - name: Auto merge if approved - if: ${{ github.event_name != 'merge_group' }} - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 merge_pr.py --check-approved - + python3 finish_check.py ############################################################################################# ###################################### JEPSEN TESTS ######################################### @@ -195,5 +209,5 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse Keeper Jepsen - runner_type: style-checker + runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} diff --git a/.github/workflows/pull_request_approved.yml b/.github/workflows/pull_request_approved.yml deleted file mode 100644 index 3de4978ad68..00000000000 --- a/.github/workflows/pull_request_approved.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: PullRequestApprovedCI - -env: - # Force the stdout and stderr streams to be unbuffered - PYTHONUNBUFFERED: 1 - -on: # yamllint disable-line rule:truthy - pull_request_review: - types: - - submitted - -jobs: - MergeOnApproval: - runs-on: [self-hosted, style-checker] - steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - - name: Merge approved PR - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 merge_pr.py --check-approved diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index 80d78d93e1b..5e254d785ec 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -33,6 +33,10 @@ name: Build ClickHouse additional_envs: description: additional ENV variables to setup the job type: string + secrets: + secret_envs: + description: if given, it's passed to the environments + required: false jobs: Build: @@ -54,6 +58,7 @@ jobs: run: | cat >> "$GITHUB_ENV" << 'EOF' ${{inputs.additional_envs}} + ${{secrets.secret_envs}} DOCKER_TAG< 1 && format('-{0}',matrix.batch) || '' }} strategy: - fail-fast: false # we always wait for entire matrix + fail-fast: false # we always wait for the entire matrix matrix: batch: ${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].batches }} steps: diff --git a/.github/workflows/reusable_test_stage.yml b/.github/workflows/reusable_test_stage.yml index d7bd55fab43..8926b43d372 100644 --- a/.github/workflows/reusable_test_stage.yml +++ b/.github/workflows/reusable_test_stage.yml @@ -10,6 +10,10 @@ name: StageWF description: ci data type: string required: true + secrets: + secret_envs: + description: if given, it's passed to the environments + required: false jobs: s: @@ -23,3 +27,5 @@ jobs: test_name: ${{ matrix.job_name_and_runner_type.job_name }} runner_type: ${{ matrix.job_name_and_runner_type.runner_type }} data: ${{ inputs.data }} + secrets: + secret_envs: ${{ secrets.secret_envs }} diff --git a/.gitignore b/.gitignore index db3f77d7d1e..4bc162c1b0f 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,9 @@ *.stderr *.stdout +# llvm-xray logs +xray-log.* + /docs/build /docs/publish /docs/edit diff --git a/.gitmessage b/.gitmessage deleted file mode 100644 index 89ee7d35d23..00000000000 --- a/.gitmessage +++ /dev/null @@ -1,29 +0,0 @@ - - -### CI modificators (add a leading space to apply) ### - -## To avoid a merge commit in CI: -#no_merge_commit - -## To discard CI cache: -#no_ci_cache - -## To not test (only style check): -#do_not_test - -## To run specified set of tests in CI: -#ci_set_ -#ci_set_reduced -#ci_set_arm -#ci_set_integration -#ci_set_old_analyzer - -## To run specified job in CI: -#job_ -#job_stateless_tests_release -#job_package_debug -#job_integration_tests_asan - -## To run only specified batches for multi-batch job(s) -#batch_2 -#batch_1_2_3 diff --git a/.gitmodules b/.gitmodules index fb72dc6e3ec..28696428e8c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,7 +6,7 @@ url = https://github.com/facebook/zstd [submodule "contrib/lz4"] path = contrib/lz4 - url = https://github.com/ClickHouse/lz4 + url = https://github.com/lz4/lz4 [submodule "contrib/librdkafka"] path = contrib/librdkafka url = https://github.com/ClickHouse/librdkafka diff --git a/CHANGELOG.md b/CHANGELOG.md index 207b88f7860..4891b79e4c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v24.5, 2024-05-30](#245)**
**[ClickHouse release v24.4, 2024-04-30](#244)**
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**
**[ClickHouse release v24.2, 2024-02-29](#242)**
@@ -7,6 +8,162 @@ # 2024 Changelog +### ClickHouse release 24.5, 2024-05-30 + +#### Backward Incompatible Change +* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make sure to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)). +* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_error_prone_window_functions = 1` or set `compatibility = '24.4'` or lower. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)). +* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Adds the `Form` format to read/write a single record in the `application/x-www-form-urlencoded` format. [#60199](https://github.com/ClickHouse/ClickHouse/pull/60199) ([Shaun Struwig](https://github.com/Blargian)). +* Added possibility to compress in CROSS JOIN. [#60459](https://github.com/ClickHouse/ClickHouse/pull/60459) ([p1rattttt](https://github.com/p1rattttt)). +* Added possibility to do `CROSS JOIN` in temporary files if the size exceeds limits. [#63432](https://github.com/ClickHouse/ClickHouse/pull/63432) ([p1rattttt](https://github.com/p1rattttt)). +* Support join with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y`. To enable, `SET allow_experimental_join_condition = 1`. [#60920](https://github.com/ClickHouse/ClickHouse/pull/60920) ([lgbo](https://github.com/lgbo-ustc)). +* Maps can now have `Float32`, `Float64`, `Array(T)`, `Map(K, V)` and `Tuple(T1, T2, ...)` as keys. Closes [#54537](https://github.com/ClickHouse/ClickHouse/issues/54537). [#59318](https://github.com/ClickHouse/ClickHouse/pull/59318) ([李扬](https://github.com/taiyang-li)). +* Introduce bulk loading to `EmbeddedRocksDB` by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce `EmbeddedRocksDB` table settings. [#59163](https://github.com/ClickHouse/ClickHouse/pull/59163) [#63324](https://github.com/ClickHouse/ClickHouse/pull/63324) ([Duc Canh Le](https://github.com/canhld94)). +* User can now parse CRLF with TSV format using a setting `input_format_tsv_crlf_end_of_line`. Closes [#56257](https://github.com/ClickHouse/ClickHouse/issues/56257). [#59747](https://github.com/ClickHouse/ClickHouse/pull/59747) ([Shaun Struwig](https://github.com/Blargian)). +* A new setting `input_format_force_null_for_omitted_fields` that forces NULL values for omitted fields. [#60887](https://github.com/ClickHouse/ClickHouse/pull/60887) ([Constantine Peresypkin](https://github.com/pkit)). +* Earlier our S3 storage and s3 table function didn't support selecting from archive container files, such as tarballs, zip, 7z. Now they allow to iterate over files inside archives in S3. [#62259](https://github.com/ClickHouse/ClickHouse/pull/62259) ([Daniil Ivanik](https://github.com/divanik)). +* Support for conditional function `clamp`. [#62377](https://github.com/ClickHouse/ClickHouse/pull/62377) ([skyoct](https://github.com/skyoct)). +* Add `NPy` output format. [#62430](https://github.com/ClickHouse/ClickHouse/pull/62430) ([豪肥肥](https://github.com/HowePa)). +* `Raw` format as a synonym for `TSVRaw`. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian)). +* Added a new SQL function `generateUUIDv7` to generate version 7 UUIDs aka. timestamp-based UUIDs with random component. Also added a new function `UUIDToNum` to extract bytes from a UUID and a new function `UUIDv7ToDateTime` to extract timestamp component from a UUID version 7. [#62852](https://github.com/ClickHouse/ClickHouse/pull/62852) ([Alexey Petrunyaka](https://github.com/pet74alex)). +* On Linux and MacOS, if the program has stdout redirected to a file with a compression extension, use the corresponding compression method instead of nothing (making it behave similarly to `INTO OUTFILE`). [#63662](https://github.com/ClickHouse/ClickHouse/pull/63662) ([v01dXYZ](https://github.com/v01dXYZ)). +* Change warning on high number of attached tables to differentiate tables, views and dictionaries. [#64180](https://github.com/ClickHouse/ClickHouse/pull/64180) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Provide support for `azureBlobStorage` function in ClickHouse server to use Azure Workload identity to authenticate against Azure blob storage. If `use_workload_identity` parameter is set in config, [workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications) is used for authentication. [#57881](https://github.com/ClickHouse/ClickHouse/pull/57881) ([Vinay Suryadevara](https://github.com/vinay92-ch)). +* Add TTL information in the `system.parts_columns` table. [#63200](https://github.com/ClickHouse/ClickHouse/pull/63200) ([litlig](https://github.com/litlig)). + +#### Experimental Features +* Implement `Dynamic` data type that allows to store values of any type inside it without knowing all of them in advance. `Dynamic` type is available under a setting `allow_experimental_dynamic_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#63058](https://github.com/ClickHouse/ClickHouse/pull/63058) ([Kruglov Pavel](https://github.com/Avogar)). +* Allowed to create `MaterializedMySQL` database without connection to MySQL. [#63397](https://github.com/ClickHouse/ClickHouse/pull/63397) ([Kirill](https://github.com/kirillgarbar)). +* Automatically mark a replica of Replicated database as lost and start recovery if some DDL task fails more than `max_retries_before_automatic_recovery` (100 by default) times in a row with the same error. Also, fixed a bug that could cause skipping DDL entries when an exception is thrown during an early stage of entry execution. [#63549](https://github.com/ClickHouse/ClickHouse/pull/63549) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Account failed files in `s3queue_tracked_file_ttl_sec` and `s3queue_traked_files_limit` for `StorageS3Queue`. [#63638](https://github.com/ClickHouse/ClickHouse/pull/63638) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Performance Improvement +* Less contention in filesystem cache (part 4). Allow to keep filesystem cache not filled to the limit by doing additional eviction in the background (controlled by `keep_free_space_size(elements)_ratio`). This allows to release pressure from space reservation for queries (on `tryReserve` method). Also this is done in a lock free way as much as possible, e.g. should not block normal cache usage. [#61250](https://github.com/ClickHouse/ClickHouse/pull/61250) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skip merging of newly created projection blocks during `INSERT`-s. [#59405](https://github.com/ClickHouse/ClickHouse/pull/59405) ([Nikita Taranov](https://github.com/nickitat)). +* Process string functions `...UTF8` 'asciily' if input strings are all ascii chars. Inspired by https://github.com/apache/doris/pull/29799. Overall speed up by 1.07x~1.62x. Notice that peak memory usage had been decreased in some cases. [#61632](https://github.com/ClickHouse/ClickHouse/pull/61632) ([李扬](https://github.com/taiyang-li)). +* Improved performance of selection (`{}`) globs in StorageS3. [#62120](https://github.com/ClickHouse/ClickHouse/pull/62120) ([Andrey Zvonov](https://github.com/zvonand)). +* HostResolver has each IP address several times. If remote host has several IPs and by some reason (firewall rules for example) access on some IPs allowed and on others forbidden, than only first record of forbidden IPs marked as failed, and in each try these IPs have a chance to be chosen (and failed again). Even if fix this, every 120 seconds DNS cache dropped, and IPs can be chosen again. [#62652](https://github.com/ClickHouse/ClickHouse/pull/62652) ([Anton Ivashkin](https://github.com/ianton-ru)). +* Add a new configuration`prefer_merge_sort_block_bytes` to control the memory usage and speed up sorting 2 times when merging when there are many columns. [#62904](https://github.com/ClickHouse/ClickHouse/pull/62904) ([LiuNeng](https://github.com/liuneng1994)). +* `clickhouse-local` will start faster. In previous versions, it was not deleting temporary directories by mistake. Now it will. This closes [#62941](https://github.com/ClickHouse/ClickHouse/issues/62941). [#63074](https://github.com/ClickHouse/ClickHouse/pull/63074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Micro-optimizations for the new analyzer. [#63429](https://github.com/ClickHouse/ClickHouse/pull/63429) ([Raúl Marín](https://github.com/Algunenano)). +* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63443](https://github.com/ClickHouse/ClickHouse/pull/63443) [#63532](https://github.com/ClickHouse/ClickHouse/pull/63532) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up indices of type `set` a little (around 1.5 times) by removing garbage. [#64098](https://github.com/ClickHouse/ClickHouse/pull/64098) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove copying data when writing to the filesystem cache. [#63401](https://github.com/ClickHouse/ClickHouse/pull/63401) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Now backups with azure blob storage will use multicopy. [#64116](https://github.com/ClickHouse/ClickHouse/pull/64116) ([alesapin](https://github.com/alesapin)). +* Allow to use native copy for azure even with different containers. [#64154](https://github.com/ClickHouse/ClickHouse/pull/64154) ([alesapin](https://github.com/alesapin)). +* Finally enable native copy for azure. [#64182](https://github.com/ClickHouse/ClickHouse/pull/64182) ([alesapin](https://github.com/alesapin)). + +#### Improvement +* Allow using `clickhouse-local` and its shortcuts `clickhouse` and `ch` with a query or queries file as a positional argument. Examples: `ch "SELECT 1"`, `ch --param_test Hello "SELECT {test:String}"`, `ch query.sql`. This closes [#62361](https://github.com/ClickHouse/ClickHouse/issues/62361). [#63081](https://github.com/ClickHouse/ClickHouse/pull/63081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable plain_rewritable metadata for local and Azure (azure_blob_storage) object storages. [#63365](https://github.com/ClickHouse/ClickHouse/pull/63365) ([Julia Kartseva](https://github.com/jkartseva)). +* Support English-style Unicode quotes, e.g. “Hello”, ‘world’. This is questionable in general but helpful when you type your query in a word processor, such as Google Docs. This closes [#58634](https://github.com/ClickHouse/ClickHouse/issues/58634). [#63381](https://github.com/ClickHouse/ClickHouse/pull/63381) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow trailing commas in the columns list in the INSERT query. For example, `INSERT INTO test (a, b, c, ) VALUES ...`. [#63803](https://github.com/ClickHouse/ClickHouse/pull/63803) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better exception messages for the `Regexp` format. [#63804](https://github.com/ClickHouse/ClickHouse/pull/63804) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow trailing commas in the `Values` format. For example, this query is allowed: `INSERT INTO test (a, b, c) VALUES (4, 5, 6,);`. [#63810](https://github.com/ClickHouse/ClickHouse/pull/63810) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix a crash in asynchronous stack unwinding (such as when using the sampling query profiler) while interpreting debug info. This closes [#60460](https://github.com/ClickHouse/ClickHouse/issues/60460). [#60468](https://github.com/ClickHouse/ClickHouse/pull/60468) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Distinct messages for s3 error 'no key' for cases disk and storage. [#61108](https://github.com/ClickHouse/ClickHouse/pull/61108) ([Sema Checherinda](https://github.com/CheSema)). +* The progress bar will work for trivial queries with LIMIT from `system.zeros`, `system.zeros_mt` (it already works for `system.numbers` and `system.numbers_mt`), and the `generateRandom` table function. As a bonus, if the total number of records is greater than the `max_rows_to_read` limit, it will throw an exception earlier. This closes [#58183](https://github.com/ClickHouse/ClickHouse/issues/58183). [#61823](https://github.com/ClickHouse/ClickHouse/pull/61823) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support for "Merge Key" in YAML configurations (this is a weird feature of YAML, please never mind). [#62685](https://github.com/ClickHouse/ClickHouse/pull/62685) ([Azat Khuzhin](https://github.com/azat)). +* Enhance error message when non-deterministic function is used with Replicated source. [#62896](https://github.com/ClickHouse/ClickHouse/pull/62896) ([Grégoire Pineau](https://github.com/lyrixx)). +* Fix interserver secret for Distributed over Distributed from `remote`. [#63013](https://github.com/ClickHouse/ClickHouse/pull/63013) ([Azat Khuzhin](https://github.com/azat)). +* Support `include_from` for YAML files. However, you should better use `config.d` [#63106](https://github.com/ClickHouse/ClickHouse/pull/63106) ([Eduard Karacharov](https://github.com/korowa)). +* Keep previous data in terminal after picking from skim suggestions. [#63261](https://github.com/ClickHouse/ClickHouse/pull/63261) ([FlameFactory](https://github.com/FlameFactory)). +* Width of fields (in Pretty formats or the `visibleWidth` function) now correctly ignores ANSI escape sequences. [#63270](https://github.com/ClickHouse/ClickHouse/pull/63270) ([Shaun Struwig](https://github.com/Blargian)). +* Update the usage of error code `NUMBER_OF_ARGUMENTS_DOESNT_MATCH` by more accurate error codes when appropriate. [#63406](https://github.com/ClickHouse/ClickHouse/pull/63406) ([Yohann Jardin](https://github.com/yohannj)). +* `os_user` and `client_hostname` are now correctly set up for queries for command line suggestions in clickhouse-client. This closes [#63430](https://github.com/ClickHouse/ClickHouse/issues/63430). [#63433](https://github.com/ClickHouse/ClickHouse/pull/63433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Automatically correct `max_block_size` to the default value if it is zero. [#63587](https://github.com/ClickHouse/ClickHouse/pull/63587) ([Antonio Andelic](https://github.com/antonio2368)). +* Add a build_id ALIAS column to trace_log to facilitate auto renaming upon detecting binary changes. This is to address [#52086](https://github.com/ClickHouse/ClickHouse/issues/52086). [#63656](https://github.com/ClickHouse/ClickHouse/pull/63656) ([Zimu Li](https://github.com/woodlzm)). +* Enable truncate operation for object storage disks. [#63693](https://github.com/ClickHouse/ClickHouse/pull/63693) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* The loading of the keywords list is now dependent on the server revision and will be disabled for the old versions of ClickHouse server. CC @azat. [#63786](https://github.com/ClickHouse/ClickHouse/pull/63786) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Clickhouse disks have to read server setting to obtain actual metadata format version. [#63831](https://github.com/ClickHouse/ClickHouse/pull/63831) ([Sema Checherinda](https://github.com/CheSema)). +* Disable pretty format restrictions (`output_format_pretty_max_rows`/`output_format_pretty_max_value_width`) when stdout is not TTY. [#63942](https://github.com/ClickHouse/ClickHouse/pull/63942) ([Azat Khuzhin](https://github.com/azat)). +* Exception handling now works when ClickHouse is used inside AWS Lambda. Author: [Alexey Coolnev](https://github.com/acoolnev). [#64014](https://github.com/ClickHouse/ClickHouse/pull/64014) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Throw `CANNOT_DECOMPRESS` instread of `CORRUPTED_DATA` on invalid compressed data passed via HTTP. [#64036](https://github.com/ClickHouse/ClickHouse/pull/64036) ([vdimir](https://github.com/vdimir)). +* A tip for a single large number in Pretty formats now works for Nullable and LowCardinality. This closes [#61993](https://github.com/ClickHouse/ClickHouse/issues/61993). [#64084](https://github.com/ClickHouse/ClickHouse/pull/64084) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add metrics, logs, and thread names around parts filtering with indices. [#64130](https://github.com/ClickHouse/ClickHouse/pull/64130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ignore `allow_suspicious_primary_key` on `ATTACH` and verify on `ALTER`. [#64202](https://github.com/ClickHouse/ClickHouse/pull/64202) ([Azat Khuzhin](https://github.com/azat)). + +#### Build/Testing/Packaging Improvement +* ClickHouse is built with clang-18. A lot of new checks from clang-tidy-18 have been enabled. [#60469](https://github.com/ClickHouse/ClickHouse/pull/60469) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Experimentally support loongarch64 as a new platform for ClickHouse. [#63733](https://github.com/ClickHouse/ClickHouse/pull/63733) ([qiangxuhui](https://github.com/qiangxuhui)). +* The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Information about every symbol in every translation unit will be collected in the CI database for every build in the CI. This closes [#63494](https://github.com/ClickHouse/ClickHouse/issues/63494). [#63495](https://github.com/ClickHouse/ClickHouse/pull/63495) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update Apache Datasketches library. It resolves [#63858](https://github.com/ClickHouse/ClickHouse/issues/63858). [#63923](https://github.com/ClickHouse/ClickHouse/pull/63923) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable GRPC support for aarch64 linux while cross-compiling binary. [#64072](https://github.com/ClickHouse/ClickHouse/pull/64072) ([alesapin](https://github.com/alesapin)). +* Fix unwind on SIGSEGV on aarch64 (due to small stack for signal) [#64058](https://github.com/ClickHouse/ClickHouse/pull/64058) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix making backup when multiple shards are used [#57684](https://github.com/ClickHouse/ClickHouse/pull/57684) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix passing projections/indexes/primary key from columns list from CREATE query into inner table of MV [#59183](https://github.com/ClickHouse/ClickHouse/pull/59183) ([Azat Khuzhin](https://github.com/azat)). +* Fix boundRatio incorrect merge [#60532](https://github.com/ClickHouse/ClickHouse/pull/60532) ([Tao Wang](https://github.com/wangtZJU)). +* Fix crash when calling some functions on const low-cardinality columns [#61966](https://github.com/ClickHouse/ClickHouse/pull/61966) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix queries with FINAL give wrong result when table does not use adaptive granularity [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)). +* Improve detection of cgroups v2 support for memory controllers [#62903](https://github.com/ClickHouse/ClickHouse/pull/62903) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix subsequent use of external tables in client [#62964](https://github.com/ClickHouse/ClickHouse/pull/62964) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash with untuple and unresolved lambda [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)). +* Fix premature server listen for connections [#63181](https://github.com/ClickHouse/ClickHouse/pull/63181) ([alesapin](https://github.com/alesapin)). +* Fix intersecting parts when restarting after a DROP PART command [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)). +* Correctly load SQL security defaults during startup [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)). +* JOIN filter push down filter join fix [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix infinite loop in AzureObjectStorage::listObjects [#63257](https://github.com/ClickHouse/ClickHouse/pull/63257) ([Julia Kartseva](https://github.com/jkartseva)). +* CROSS join ignore join_algorithm setting [#63273](https://github.com/ClickHouse/ClickHouse/pull/63273) ([vdimir](https://github.com/vdimir)). +* Fix finalize WriteBufferToFileSegment and StatusFile [#63346](https://github.com/ClickHouse/ClickHouse/pull/63346) ([vdimir](https://github.com/vdimir)). +* Fix logical error during SELECT query after ALTER in rare case [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)). +* Fix `X-ClickHouse-Timezone` header with `session_timezone` [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix debug assert when using grouping WITH ROLLUP and LowCardinality types [#63398](https://github.com/ClickHouse/ClickHouse/pull/63398) ([Raúl Marín](https://github.com/Algunenano)). +* Small fixes for group_by_use_nulls [#63405](https://github.com/ClickHouse/ClickHouse/pull/63405) ([vdimir](https://github.com/vdimir)). +* Fix backup/restore of projection part in case projection was removed from table metadata, but part still has projection [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix mysql dictionary source [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)). +* Insert QueryFinish on AsyncInsertFlush with no data [#63483](https://github.com/ClickHouse/ClickHouse/pull/63483) ([Raúl Marín](https://github.com/Algunenano)). +* Fix: empty used_dictionaries in system.query_log [#63487](https://github.com/ClickHouse/ClickHouse/pull/63487) ([Eduard Karacharov](https://github.com/korowa)). +* Make `MergeTreePrefetchedReadPool` safer [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix crash on exit with sentry enabled (due to openssl destroyed before sentry) [#63548](https://github.com/ClickHouse/ClickHouse/pull/63548) ([Azat Khuzhin](https://github.com/azat)). +* Fix Array and Map support with Keyed hashing [#63628](https://github.com/ClickHouse/ClickHouse/pull/63628) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix filter pushdown for Parquet and maybe StorageMerge [#63642](https://github.com/ClickHouse/ClickHouse/pull/63642) ([Michael Kolupaev](https://github.com/al13n321)). +* Prevent conversion to Replicated if zookeeper path already exists [#63670](https://github.com/ClickHouse/ClickHouse/pull/63670) ([Kirill](https://github.com/kirillgarbar)). +* Analyzer: views read only necessary columns [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer: Forbid WINDOW redefinition [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)). +* flatten_nested was broken with the experimental Replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix [#63653](https://github.com/ClickHouse/ClickHouse/issues/63653) [#63722](https://github.com/ClickHouse/ClickHouse/pull/63722) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow cast from Array(Nothing) to Map(Nothing, Nothing) [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix ILLEGAL_COLUMN in partial_merge join [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)). +* Fix: remove redundant distinct with window functions [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix possible crash with SYSTEM UNLOAD PRIMARY KEY [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a query with duplicating cycling alias. [#63791](https://github.com/ClickHouse/ClickHouse/pull/63791) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make `TokenIterator` lazy as it should be [#63801](https://github.com/ClickHouse/ClickHouse/pull/63801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `endpoint_subpath` S3 URI setting [#63806](https://github.com/ClickHouse/ClickHouse/pull/63806) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix deadlock in `ParallelReadBuffer` [#63814](https://github.com/ClickHouse/ClickHouse/pull/63814) ([Antonio Andelic](https://github.com/antonio2368)). +* JOIN filter push down equivalent columns fix [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)). +* Remove data from all disks after DROP with Lazy database. [#63848](https://github.com/ClickHouse/ClickHouse/pull/63848) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix incorrect result when reading from MV with parallel replicas and new analyzer [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)). +* Fixes in `find_super_nodes` and `find_big_family` command of keeper-client [#63862](https://github.com/ClickHouse/ClickHouse/pull/63862) ([Alexander Gololobov](https://github.com/davenger)). +* Update lambda execution name [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV due to CPU/Real profiler [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)). +* Fix `EXPLAIN CURRENT TRANSACTION` query [#63926](https://github.com/ClickHouse/ClickHouse/pull/63926) ([Anton Popov](https://github.com/CurtizJ)). +* Fix analyzer: there's turtles all the way down... [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Allow certain ALTER TABLE commands for `plain_rewritable` disk [#63933](https://github.com/ClickHouse/ClickHouse/pull/63933) ([Julia Kartseva](https://github.com/jkartseva)). +* Recursive CTE distributed fix [#63939](https://github.com/ClickHouse/ClickHouse/pull/63939) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer: Fix COLUMNS resolve [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)). +* LIMIT BY and skip_unused_shards with analyzer [#63983](https://github.com/ClickHouse/ClickHouse/pull/63983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* A fix for some trash (experimental Kusto) [#63992](https://github.com/ClickHouse/ClickHouse/pull/63992) ([Yong Wang](https://github.com/kashwy)). +* Deserialize untrusted binary inputs in a safer way [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix query analysis for queries with the setting `final` = 1 for Distributed tables over tables from other than the MergeTree family. [#64037](https://github.com/ClickHouse/ClickHouse/pull/64037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing settings to recoverLostReplica [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)). +* Fix SQL security access checks with analyzer [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Fix analyzer: only interpolate expression should be used for DAG [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix azure backup writing multipart blocks by 1 MiB (read buffer size) instead of `max_upload_part_size` (in non-native copy case) [#64117](https://github.com/ClickHouse/ClickHouse/pull/64117) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly fallback during backup copy [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* Prevent LOGICAL_ERROR on CREATE TABLE as Materialized View [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)). +* Query Cache: Consider identical queries against different databases as different [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Ignore `text_log` for Keeper [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Logical error: Bad cast for Buffer table with prewhere. [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + + ### ClickHouse release 24.4, 2024-04-30 #### Upgrade Notes @@ -506,7 +663,7 @@ * Improve the operation of `sumMapFiltered` with NaN values. NaN values are now placed at the end (instead of randomly) and considered different from any values. `-0` is now also treated as equal to `0`; since 0 values are discarded, `-0` values are discarded too. [#58959](https://github.com/ClickHouse/ClickHouse/pull/58959) ([Raúl Marín](https://github.com/Algunenano)). * The function `visibleWidth` will behave according to the docs. In previous versions, it simply counted code points after string serialization, like the `lengthUTF8` function, but didn't consider zero-width and combining characters, full-width characters, tabs, and deletes. Now the behavior is changed accordingly. If you want to keep the old behavior, set `function_visible_width_behavior` to `0`, or set `compatibility` to `23.12` or lower. [#59022](https://github.com/ClickHouse/ClickHouse/pull/59022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * `Kusto` dialect is disabled until these two bugs will be fixed: [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037) and [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036). [#59305](https://github.com/ClickHouse/ClickHouse/pull/59305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Any attempt to use `Kusto` will result in exception. -* More efficient implementation of the `FINAL` modifier no longer guarantees preserving the order even if `max_threads = 1`. If you counted on the previous behavior, set `enable_vertical_final` to 0 or `compatibility` to `23.12`. +* More efficient implementation of the `FINAL` modifier no longer guarantees preserving the order even if `max_threads = 1`. If you counted on the previous behavior, set `enable_vertical_final` to 0 or `compatibility` to `23.12`. #### New Feature * Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/CMakeLists.txt b/CMakeLists.txt index be804a14765..601cbe7201c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,13 +61,16 @@ if (ENABLE_CHECK_HEAVY_BUILDS) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) - # Sanitizers are too heavy - if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE) - set (RLIMIT_DATA 10000000000) # 10G + # Sanitizers are too heavy. Some architectures too. + if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE OR ARCH_RISCV64 OR ARCH_LOONGARCH64) + # Twice as large + set (RLIMIT_DATA 10000000000) + set (RLIMIT_AS 20000000000) endif() - # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file - if (ARCH_RISCV64) + # For some files currently building RISCV64/LOONGARCH64 might be too slow. + # TODO: Improve compilation times per file + if (ARCH_RISCV64 OR ARCH_LOONGARCH64) set (RLIMIT_CPU 1800) endif() @@ -119,6 +122,8 @@ add_library(global-libs INTERFACE) include (cmake/sanitize.cmake) +include (cmake/instrument.cmake) + option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) set (CMAKE_COLOR_MAKEFILE ${ENABLE_COLORED_BUILD}) # works only for the makefile generator @@ -135,23 +140,21 @@ endif () include (cmake/check_flags.cmake) include (cmake/add_warning.cmake) -if (COMPILER_CLANG) - # generate ranges for fast "addr2line" search - if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") - # NOTE: that clang has a bug because of it does not emit .debug_aranges - # with ThinLTO, so custom ld.lld wrapper is shipped in docker images. - set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges") - endif () - - # See https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/ - if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing") - endif() - - no_warning(enum-constexpr-conversion) # breaks Protobuf in clang-16 +# generate ranges for fast "addr2line" search +if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") + # NOTE: that clang has a bug because of it does not emit .debug_aranges + # with ThinLTO, so custom ld.lld wrapper is shipped in docker images. + set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges") endif () +# See https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/ +if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing") +endif() + +no_warning(enum-constexpr-conversion) # breaks Protobuf in clang-16 + option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF) @@ -207,8 +210,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS "Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)" ${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT}) -option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) - option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF) if (NOT BUILD_STANDALONE_KEEPER) option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON) @@ -284,16 +285,12 @@ endif () option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF) if (ENABLE_BUILD_PROFILING) - if (COMPILER_CLANG) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace") + set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace") - if (LINKER_NAME MATCHES "lld") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--time-trace") - set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--time-trace") - endif () - else () - message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang") - endif () + if (LINKER_NAME MATCHES "lld") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--time-trace") + set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--time-trace") + endif () endif () set (CMAKE_CXX_STANDARD 23) @@ -304,22 +301,20 @@ set (CMAKE_C_STANDARD 11) set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C set (CMAKE_C_STANDARD_REQUIRED ON) -if (COMPILER_CLANG) - # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. - # See https://reviews.llvm.org/D112921 - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") +# Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. +# See https://reviews.llvm.org/D112921 +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") - # falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable - # benchmarks. - set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32") +# falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable +# benchmarks. +set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32") - if (ARCH_AMD64) - # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change, - # which makes benchmark results more stable. - set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries") - set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}") - endif() -endif () +if (ARCH_AMD64) + # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change, + # which makes benchmark results more stable. + set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries") + set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}") +endif() # Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off") @@ -348,39 +343,34 @@ set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} $ set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") -if (COMPILER_CLANG) - if (OS_DARWIN) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main") - endif() +if (OS_DARWIN) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main") +endif() - # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths") +# Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead. +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths") - if (NOT ENABLE_TESTS AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX) - # https://clang.llvm.org/docs/ThinLTO.html - # Applies to clang and linux only. - # Disabled when building with tests or sanitizers. - option(ENABLE_THINLTO "Clang-specific link time optimization" ON) - endif() +if (NOT ENABLE_TESTS AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX) + # https://clang.llvm.org/docs/ThinLTO.html + # Applies to clang and linux only. + # Disabled when building with tests or sanitizers. + option(ENABLE_THINLTO "Clang-specific link time optimization" ON) +endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers") - - # We cannot afford to use LTO when compiling unit tests, and it's not enough - # to only supply -fno-lto at the final linking stage. So we disable it - # completely. - if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE) - # Link time optimization - set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables") - set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables") - set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables") - elseif (ENABLE_THINLTO) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO") - endif () +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers") +# We cannot afford to use LTO when compiling unit tests, and it's not enough +# to only supply -fno-lto at the final linking stage. So we disable it +# completely. +if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE) + # Link time optimization + set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables") + set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables") + set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables") elseif (ENABLE_THINLTO) - message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with Clang") + message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO") endif () # Turns on all external libs like s3, kafka, ODBC, ... diff --git a/PreLoad.cmake b/PreLoad.cmake index 4879e721ae3..e0fd37b2fd6 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -93,6 +93,8 @@ if (OS MATCHES "Linux" set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-ppc64le.cmake" CACHE INTERNAL "") elseif (ARCH MATCHES "^(s390x.*|S390X.*)") set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-s390x.cmake" CACHE INTERNAL "") + elseif (ARCH MATCHES "^(loongarch64.*|LOONGARCH64.*)") + set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-loongarch64.cmake" CACHE INTERNAL "") else () message (FATAL_ERROR "Unsupported architecture: ${ARCH}") endif () diff --git a/README.md b/README.md index d7202454332..73d989210b5 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,6 @@ Every month we get together with the community (users, contributors, customers, Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/300405581/) - May 4 * [ClickHouse Happy Hour @ Tom's Watch Bar - Los Angeles](https://www.meetup.com/clickhouse-los-angeles-user-group/events/300740584/) - May 22 * [ClickHouse & Confluent Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28 * [ClickHouse Meetup in Stockholm](https://www.meetup.com/clickhouse-stockholm-user-group/events/299752651/) - Jun 3 @@ -49,6 +48,7 @@ Keep an eye out for upcoming meetups and events around the world. Somewhere else * [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 +* [ClickHouse Meetup @ Klaviyo - Boston](https://www.meetup.com/clickhouse-boston-user-group/events/300907870) - Jul 11 ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" diff --git a/SECURITY.md b/SECURITY.md index 14c39129db9..8635951dc0e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,22 +2,27 @@ the file is autogenerated by utils/security-generator/generate_security.py --> -# Security Policy +# ClickHouse Security Vulnerability Response Policy -## Security Announcements -Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). +## Security Change Log and Support -## Scope and Supported Versions +Details regarding security fixes are publicly reported in our [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). A summary of known security vulnerabilities is shown at the bottom of this page. -The following versions of ClickHouse server are currently being supported with security updates: +Vulnerability notifications pre-release or during embargo periods are available to open source users and support customers registered for vulnerability alerts. Refer to our [Embargo Policy](#embargo-policy) below. + +The following versions of ClickHouse server are currently supported with security updates: | Version | Supported | |:-|:-| +| 24.5 | ✔️ | | 24.4 | ✔️ | | 24.3 | ✔️ | -| 24.2 | ✔️ | +| 24.2 | ❌ | | 24.1 | ❌ | -| 23.* | ❌ | +| 23.12 | ❌ | +| 23.11 | ❌ | +| 23.10 | ❌ | +| 23.9 | ❌ | | 23.8 | ✔️ | | 23.7 | ❌ | | 23.6 | ❌ | @@ -37,7 +42,7 @@ The following versions of ClickHouse server are currently being supported with s We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers. -To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement. +To report a potential vulnerability in ClickHouse please send the details about it through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement. ### When Should I Report a Vulnerability? @@ -59,3 +64,21 @@ As the security issue moves from triage, to identified fix, to release planning A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days. +## Embargo Policy + +Open source users and support customers may subscribe to receive alerts during the embargo period by visiting [https://trust.clickhouse.com/?product=clickhouseoss](https://trust.clickhouse.com/?product=clickhouseoss), requesting access and subscribing for alerts. Subscribers agree not to make these notifications public, issue communications, share this information with others, or issue public patches before the disclosure date. Accidental disclosures must be reported immediately to trust@clickhouse.com. Failure to follow this policy or repeated leaks may result in removal from the subscriber list. + +Participation criteria: +1. Be a current open source user or support customer with a valid corporate email domain (no @gmail.com, @azure.com, etc.). +1. Sign up to the ClickHouse OSS Trust Center at [https://trust.clickhouse.com](https://trust.clickhouse.com). +1. Accept the ClickHouse Security Vulnerability Response Policy as outlined above. +1. Subscribe to ClickHouse OSS Trust Center alerts. + +Removal criteria: +1. Members may be removed for failure to follow this policy or repeated leaks. +1. Members may be removed for bounced messages (mail delivery failure). +1. Members may unsubscribe at any time. + +Notification process: +ClickHouse will post notifications within our OSS Trust Center and notify subscribers. Subscribers must log in to the Trust Center to download the notification. The notification will include the timeframe for public disclosure. + diff --git a/base/base/BorrowedObjectPool.h b/base/base/BorrowedObjectPool.h index 05a23d5835e..f5ef28582b2 100644 --- a/base/base/BorrowedObjectPool.h +++ b/base/base/BorrowedObjectPool.h @@ -86,7 +86,7 @@ public: } /// Return object into pool. Client must return same object that was borrowed. - inline void returnObject(T && object_to_return) + void returnObject(T && object_to_return) { { std::lock_guard lock(objects_mutex); @@ -99,20 +99,20 @@ public: } /// Max pool size - inline size_t maxSize() const + size_t maxSize() const { return max_size; } /// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full. - inline size_t allocatedObjectsSize() const + size_t allocatedObjectsSize() const { std::lock_guard lock(objects_mutex); return allocated_objects_size; } /// Returns allocatedObjectsSize == maxSize - inline bool isFull() const + bool isFull() const { std::lock_guard lock(objects_mutex); return allocated_objects_size == max_size; @@ -120,7 +120,7 @@ public: /// Borrowed objects size. If borrowedObjectsSize == allocatedObjectsSize and pool is full. /// Then client will wait during borrowObject function call. - inline size_t borrowedObjectsSize() const + size_t borrowedObjectsSize() const { std::lock_guard lock(objects_mutex); return borrowed_objects_size; @@ -129,7 +129,7 @@ public: private: template - inline T allocateObjectForBorrowing(const std::unique_lock &, FactoryFunc && func) + T allocateObjectForBorrowing(const std::unique_lock &, FactoryFunc && func) { ++allocated_objects_size; ++borrowed_objects_size; @@ -137,7 +137,7 @@ private: return std::forward(func)(); } - inline T borrowFromObjects(const std::unique_lock &) + T borrowFromObjects(const std::unique_lock &) { T dst; detail::moveOrCopyIfThrow(std::move(objects.back()), dst); diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 27aa0bd6baf..159502c9735 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -34,15 +34,6 @@ set (SRCS throwError.cpp ) -if (USE_DEBUG_HELPERS) - get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES) - # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. - # Prefixing "SHELL:" will force it to use the original text. - set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/base/base/iostream_debug_helpers.h\"") - # Use generator expression as we don't want to pollute CMAKE_CXX_FLAGS, which will interfere with CMake check system. - add_compile_options($<$:${INCLUDE_DEBUG_HELPERS}>) -endif () - add_library (common ${SRCS}) if (WITH_COVERAGE) diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h index beb228cea3c..a11e13a479b 100644 --- a/base/base/Decimal_fwd.h +++ b/base/base/Decimal_fwd.h @@ -44,6 +44,10 @@ concept is_over_big_int = || std::is_same_v || std::is_same_v || std::is_same_v; + +template +concept is_over_big_decimal = is_decimal && is_over_big_int; + } template <> struct is_signed { static constexpr bool value = true; }; diff --git a/base/base/DecomposedFloat.h b/base/base/DecomposedFloat.h index f152637b94e..4837782621e 100644 --- a/base/base/DecomposedFloat.h +++ b/base/base/DecomposedFloat.h @@ -51,11 +51,9 @@ struct DecomposedFloat /// Returns 0 for both +0. and -0. int sign() const { - return (exponent() == 0 && mantissa() == 0) - ? 0 - : (isNegative() - ? -1 - : 1); + if (exponent() == 0 && mantissa() == 0) + return 0; + return isNegative() ? -1 : 1; } uint16_t exponent() const diff --git a/base/base/EnumReflection.h b/base/base/EnumReflection.h index e3208f16a75..4a9de4d17a3 100644 --- a/base/base/EnumReflection.h +++ b/base/base/EnumReflection.h @@ -11,7 +11,7 @@ namespace detail template constexpr void static_for(F && f, std::index_sequence) { - (std::forward(f)(std::integral_constant(I)>()) , ...); + (f(std::integral_constant(I)>()) , ...); } } diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 9da059c98b6..a68b6663e50 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -651,7 +651,9 @@ std::string_view JSON::getRawString() const Pos s = ptr_begin; if (*s != '"') throw JSONException(std::string("JSON: expected \", got ") + *s); - while (++s != ptr_end && *s != '"'); + ++s; + while (s != ptr_end && *s != '"') + ++s; if (s != ptr_end) return std::string_view(ptr_begin + 1, s - ptr_begin - 1); throw JSONException("JSON: incorrect syntax (expected end of string, found end of JSON)."); diff --git a/base/base/JSON.h b/base/base/JSON.h index bc053670a96..7b9acf11d9a 100644 --- a/base/base/JSON.h +++ b/base/base/JSON.h @@ -74,7 +74,7 @@ public: const char * data() const { return ptr_begin; } const char * dataEnd() const { return ptr_end; } - enum ElementType + enum ElementType : uint8_t { TYPE_OBJECT, TYPE_ARRAY, diff --git a/base/base/TypeList.h b/base/base/TypeList.h index 310f0c0c586..ebbe1b48b29 100644 --- a/base/base/TypeList.h +++ b/base/base/TypeList.h @@ -27,7 +27,7 @@ namespace TypeListUtils /// In some contexts it's more handy to use functions in constexpr Root changeRoot(TypeList) { return {}; } template - constexpr void forEach(TypeList, F && f) { (std::forward(f)(TypeList{}), ...); } + constexpr void forEach(TypeList, F && f) { (f(TypeList{}), ...); } } template diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index bea2e99fa51..f20b9daf22e 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -9,11 +9,18 @@ bool cgroupsV2Enabled() { #if defined(OS_LINUX) - /// This file exists iff the host has cgroups v2 enabled. - auto controllers_file = default_cgroups_mount / "cgroup.controllers"; - if (!std::filesystem::exists(controllers_file)) - return false; - return true; + try + { + /// This file exists iff the host has cgroups v2 enabled. + auto controllers_file = default_cgroups_mount / "cgroup.controllers"; + if (!std::filesystem::exists(controllers_file)) + return false; + return true; + } + catch (const std::filesystem::filesystem_error &) /// all "underlying OS API errors", typically: permission denied + { + return false; /// not logging the exception as most callers fall back to cgroups v1 + } #else return false; #endif diff --git a/base/base/constexpr_helpers.h b/base/base/constexpr_helpers.h index 2dad106a7a3..13bb5d85a56 100644 --- a/base/base/constexpr_helpers.h +++ b/base/base/constexpr_helpers.h @@ -21,7 +21,7 @@ bool func_wrapper(Func && func, Arg && arg) template constexpr bool static_for_impl(Func && f, std::integer_sequence) { - return (func_wrapper(std::forward(f), std::integral_constant{}) || ...); + return (func_wrapper(f, std::integral_constant{}) || ...); } template diff --git a/base/base/defines.h b/base/base/defines.h index 627c50c27d2..2fc54c37bde 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -28,8 +28,8 @@ #define NO_INLINE __attribute__((__noinline__)) #define MAY_ALIAS __attribute__((__may_alias__)) -#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__) && !defined(__s390x__) && !(defined(__riscv) && (__riscv_xlen == 64)) -# error "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress), s390x (work in progress) and RISC-V 64 (experimental)" +#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__) && !defined(__s390x__) && !(defined(__loongarch64)) && !(defined(__riscv) && (__riscv_xlen == 64)) +# error "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress), s390x (work in progress), loongarch64 (experimental) and RISC-V 64 (experimental)" #endif /// Check for presence of address sanitizer diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index fda94edaa88..30ee759ba33 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -147,7 +147,7 @@ constexpr uint16_t maybe_negate(uint16_t x) return ~x; } -enum class ReturnMode +enum class ReturnMode : uint8_t { End, Nullptr, diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index 3d01e301f45..f47cba9833d 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -77,8 +77,7 @@ uint64_t getMemoryAmountOrZero() { uint64_t limit_v1; if (limit_file_v1 >> limit_v1) - if (limit_v1 < memory_amount) - memory_amount = limit_v1; + memory_amount = std::min(memory_amount, limit_v1); } } diff --git a/base/base/hex.h b/base/base/hex.h index 931f220aa08..5e88ce76386 100644 --- a/base/base/hex.h +++ b/base/base/hex.h @@ -146,7 +146,7 @@ namespace impl TUInt res; if constexpr (sizeof(TUInt) == 1) { - res = static_cast(unhexDigit(data[0])) * 0x10 + static_cast(unhexDigit(data[1])); + res = unhexDigit(data[0]) * 0x10 + unhexDigit(data[1]); } else if constexpr (sizeof(TUInt) == 2) { @@ -176,17 +176,19 @@ namespace impl }; /// Helper template class to convert a value of any supported type to hexadecimal representation and back. - template + template struct HexConversion; template - struct HexConversion>> : public HexConversionUInt {}; + requires(std::is_integral_v) + struct HexConversion : public HexConversionUInt {}; template struct HexConversion> : public HexConversionUInt> {}; template /// Partial specialization here allows not to include in this header. - struct HexConversion>> + requires(std::is_same_v) + struct HexConversion { static const constexpr size_t num_hex_digits = 32; diff --git a/base/base/iostream_debug_helpers.h b/base/base/iostream_debug_helpers.h deleted file mode 100644 index 5c601251272..00000000000 --- a/base/base/iostream_debug_helpers.h +++ /dev/null @@ -1,183 +0,0 @@ -#pragma once - -#include "demangle.h" -#include "getThreadId.h" -#include -#include -#include -#include -#include - -/** Usage: - * - * DUMP(variable...) - */ - - -template -Out & dumpValue(Out &, T &&); - - -/// Catch-all case. -template -std::enable_if_t & dumpImpl(Out & out, T &&) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return out << "{...}"; -} - -/// An object, that could be output with operator <<. -template -std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t() << std::declval())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return out << x; -} - -/// A pointer-like object. -template -std::enable_if_t, std::decay_t())>> - , Out> & dumpImpl(Out & out, T && x, std::decay_t())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - if (!x) - return out << "nullptr"; - return dumpValue(out, *x); -} - -/// Container. -template -std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - bool first = true; - out << "{"; - for (const auto & elem : x) - { - if (first) - first = false; - else - out << ", "; - dumpValue(out, elem); - } - return out << "}"; -} - - -template -std::enable_if_t>, Out> & -dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return out << magic_enum::enum_name(x); -} - -/// string and const char * - output not as container or pointer. - -template -std::enable_if_t, std::string> || std::is_same_v, const char *>), Out> & -dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return out << std::quoted(x); -} - -/// UInt8 - output as number, not char. - -template -std::enable_if_t, unsigned char>, Out> & -dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return out << int(x); -} - - -/// Tuple, pair -template -Out & dumpTupleImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - if constexpr (N == 0) - out << "{"; - else - out << ", "; - - dumpValue(out, std::get(x)); - - if constexpr (N + 1 == std::tuple_size_v>) - out << "}"; - else - dumpTupleImpl(out, x); - - return out; -} - -template -std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t(std::declval()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return dumpTupleImpl<0>(out, x); -} - - -template -Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t(std::declval(), std::declval()))> *) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return dumpImpl(out, x); -} - -// NOLINTNEXTLINE(google-explicit-constructor) -struct LowPriority { LowPriority(void *) {} }; - -template -Out & dumpDispatchPriorities(Out & out, T && x, LowPriority) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return dumpDispatchPriorities(out, x, nullptr); -} - - -template -Out & dumpValue(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - return dumpDispatchPriorities<5>(out, x, nullptr); -} - - -template -Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) -{ - // Dumping string literal, printing name and demangled type is irrelevant. - if constexpr (std::is_same_v>>) - { - const auto name_len = strlen(name); - const auto value_len = strlen(x); - // `name` is the same as quoted `x` - if (name_len > 2 && value_len > 0 && name[0] == '"' && name[name_len - 1] == '"' - && strncmp(name + 1, x, std::min(value_len, name_len) - 1) == 0) - return out << x; - } - - out << demangle(typeid(x).name()) << " " << name << " = "; - return dumpValue(out, x) << "; "; -} - -#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" - -#define DUMPVAR(VAR) ::dump(std::cerr, #VAR, (VAR)); -#define DUMPHEAD std::cerr << __FILE__ << ':' << __LINE__ << " [ " << getThreadId() << " ] "; -#define DUMPTAIL std::cerr << '\n'; - -#define DUMP1(V1) do { DUMPHEAD DUMPVAR(V1) DUMPTAIL } while(0) -#define DUMP2(V1, V2) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPTAIL } while(0) -#define DUMP3(V1, V2, V3) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPTAIL } while(0) -#define DUMP4(V1, V2, V3, V4) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPTAIL } while(0) -#define DUMP5(V1, V2, V3, V4, V5) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPTAIL } while(0) -#define DUMP6(V1, V2, V3, V4, V5, V6) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPTAIL } while(0) -#define DUMP7(V1, V2, V3, V4, V5, V6, V7) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPTAIL } while(0) -#define DUMP8(V1, V2, V3, V4, V5, V6, V7, V8) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPTAIL } while(0) -#define DUMP9(V1, V2, V3, V4, V5, V6, V7, V8, V9) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPVAR(V9) DUMPTAIL } while(0) - -/// https://groups.google.com/forum/#!searchin/kona-dev/variadic$20macro%7Csort:date/kona-dev/XMA-lDOqtlI/GCzdfZsD41sJ - -#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, x7, x8, x9, N, ...) N -#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1) - -#define MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS) PREFIX ## NUM_ARGS -#define MAKE_VAR_MACRO_IMPL(PREFIX, NUM_ARGS) MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS) -#define MAKE_VAR_MACRO(PREFIX, ...) MAKE_VAR_MACRO_IMPL(PREFIX, VA_NUM_ARGS(__VA_ARGS__)) - -#define DUMP(...) MAKE_VAR_MACRO(DUMP, __VA_ARGS__)(__VA_ARGS__) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index fd8fd8de025..9a2d02e3388 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -250,14 +250,16 @@ ALWAYS_INLINE inline char * uitoa, 1>(char * p, UnsignedOfSize //===----------------------------------------------------------===// // itoa: handle unsigned integral operands (selected by SFINAE) -template && std::is_integral_v> * = nullptr> +template +requires(!std::is_signed_v && std::is_integral_v) ALWAYS_INLINE inline char * itoa(U u, char * p) { return convert::uitoa(p, u); } // itoa: handle signed integral operands (selected by SFINAE) -template && std::is_integral_v> * = nullptr> +template +requires(std::is_signed_v && std::is_integral_v) ALWAYS_INLINE inline char * itoa(I i, char * p) { // Need "mask" to be filled with a copy of the sign bit. diff --git a/base/base/map.h b/base/base/map.h index 043d8363619..0de42ebfdf6 100644 --- a/base/base/map.h +++ b/base/base/map.h @@ -19,8 +19,8 @@ auto map(const Collection & collection, Mapper && mapper) using value_type = unqualified_t; return Collection( - boost::make_transform_iterator(std::begin(collection), std::forward(mapper)), - boost::make_transform_iterator(std::end(collection), std::forward(mapper))); + boost::make_transform_iterator(std::begin(collection), mapper), + boost::make_transform_iterator(std::end(collection), mapper)); } /** \brief Returns collection of specified container-type, @@ -33,8 +33,8 @@ auto map(const Collection & collection, Mapper && mapper) using value_type = unqualified_t; return ResultCollection( - boost::make_transform_iterator(std::begin(collection), std::forward(mapper)), - boost::make_transform_iterator(std::end(collection), std::forward(mapper))); + boost::make_transform_iterator(std::begin(collection), mapper), + boost::make_transform_iterator(std::end(collection), mapper)); } /** \brief Returns collection of specified type, @@ -45,8 +45,8 @@ template auto map(const Collection & collection, Mapper && mapper) { return ResultCollection( - boost::make_transform_iterator(std::begin(collection), std::forward(mapper)), - boost::make_transform_iterator(std::end(collection), std::forward(mapper))); + boost::make_transform_iterator(std::begin(collection), mapper), + boost::make_transform_iterator(std::end(collection), mapper)); } } diff --git a/base/base/range.h b/base/base/range.h index aacd7e433a4..c75359a44c3 100644 --- a/base/base/range.h +++ b/base/base/range.h @@ -23,12 +23,10 @@ namespace internal /// For loop adaptor which is used to iterate through a half-closed interval [begin, end). /// The parameters `begin` and `end` can have any integral or enum types. -template || std::is_enum_v) && - (std::is_integral_v || std::is_enum_v) && - (!std::is_enum_v || !std::is_enum_v || std::is_same_v), void>> +template +requires((std::is_integral_v || std::is_enum_v) && + (std::is_integral_v || std::is_enum_v) && + (!std::is_enum_v || !std::is_enum_v || std::is_same_v)) inline auto range(BeginType begin, EndType end) { if constexpr (std::is_integral_v && std::is_integral_v) @@ -50,8 +48,8 @@ inline auto range(BeginType begin, EndType end) /// For loop adaptor which is used to iterate through a half-closed interval [0, end). /// The parameter `end` can have any integral or enum type. /// The same as range(0, end). -template || std::is_enum_v, void>> +template +requires(std::is_integral_v || std::is_enum_v) inline auto range(Type end) { if constexpr (std::is_integral_v) diff --git a/base/base/sleep.cpp b/base/base/sleep.cpp index 9611f8cc40f..312a5a5db0b 100644 --- a/base/base/sleep.cpp +++ b/base/base/sleep.cpp @@ -2,6 +2,7 @@ #include #include +#include #if defined(OS_DARWIN) #include @@ -34,7 +35,8 @@ void sleepForNanoseconds(uint64_t nanoseconds) constexpr auto clock_type = CLOCK_MONOTONIC; struct timespec current_time; - clock_gettime(clock_type, ¤t_time); + if (0 != clock_gettime(clock_type, ¤t_time)) + throw std::system_error(std::error_code(errno, std::system_category())); constexpr uint64_t resolution = 1'000'000'000; struct timespec finish_time = current_time; diff --git a/base/base/tests/CMakeLists.txt b/base/base/tests/CMakeLists.txt index 81db4f3622f..e69de29bb2d 100644 --- a/base/base/tests/CMakeLists.txt +++ b/base/base/tests/CMakeLists.txt @@ -1,2 +0,0 @@ -clickhouse_add_executable (dump_variable dump_variable.cpp) -target_link_libraries (dump_variable PRIVATE clickhouse_common_io) diff --git a/base/base/tests/dump_variable.cpp b/base/base/tests/dump_variable.cpp deleted file mode 100644 index 9addc298ecb..00000000000 --- a/base/base/tests/dump_variable.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include - - -struct S1; -struct S2 {}; - -struct S3 -{ - std::set m1; -}; - -std::ostream & operator<<(std::ostream & stream, const S3 & what) -{ - stream << "S3 {m1="; - dumpValue(stream, what.m1) << "}"; - return stream; -} - -int main(int, char **) -{ - int x = 1; - - DUMP(x); - DUMP(x, 1, &x); - - DUMP(std::make_unique(1)); - DUMP(std::make_shared(1)); - - std::vector vec{1, 2, 3}; - DUMP(vec); - - auto pair = std::make_pair(1, 2); - DUMP(pair); - - auto tuple = std::make_tuple(1, 2, 3); - DUMP(tuple); - - std::map map{{1, "hello"}, {2, "world"}}; - DUMP(map); - - std::initializer_list list{"hello", "world"}; - DUMP(list); - - std::array arr{{"hello", "world"}}; - DUMP(arr); - - //DUMP([]{}); - - S1 * s = nullptr; - DUMP(s); - - DUMP(S2()); - - std::set variants = {"hello", "world"}; - DUMP(variants); - - S3 s3 {{"hello", "world"}}; - DUMP(s3); - - return 0; -} diff --git a/base/base/wide_integer.h b/base/base/wide_integer.h index ffd30460c03..f3a4dc9e6d5 100644 --- a/base/base/wide_integer.h +++ b/base/base/wide_integer.h @@ -111,7 +111,8 @@ public: constexpr explicit operator bool() const noexcept; - template , T>> + template + requires(std::is_arithmetic_v) constexpr operator T() const noexcept; constexpr operator long double() const noexcept; @@ -208,12 +209,14 @@ constexpr integer operator<<(const integer & lhs, in template constexpr integer operator>>(const integer & lhs, int n) noexcept; -template >> +template +requires(!std::is_same_v) constexpr integer operator<<(const integer & lhs, Int n) noexcept { return lhs << int(n); } -template >> +template +requires(!std::is_same_v) constexpr integer operator>>(const integer & lhs, Int n) noexcept { return lhs >> int(n); @@ -262,4 +265,3 @@ struct hash>; // NOLINTEND(*) #include "wide_integer_impl.h" - diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 0e98b6e5ee6..3787971a20e 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -1246,7 +1246,8 @@ constexpr integer::operator bool() const noexcept } template -template +template +requires(std::is_arithmetic_v) constexpr integer::operator T() const noexcept { static_assert(std::numeric_limits::is_integer); diff --git a/base/poco/Foundation/include/Poco/Platform.h b/base/poco/Foundation/include/Poco/Platform.h index fe45833aea6..30937d0fc91 100644 --- a/base/poco/Foundation/include/Poco/Platform.h +++ b/base/poco/Foundation/include/Poco/Platform.h @@ -120,6 +120,7 @@ #define POCO_ARCH_AARCH64 0x0f #define POCO_ARCH_ARM64 0x0f // same as POCO_ARCH_AARCH64 #define POCO_ARCH_RISCV64 0x10 +#define POCO_ARCH_LOONGARCH64 0x12 #if defined(__ALPHA) || defined(__alpha) || defined(__alpha__) || defined(_M_ALPHA) @@ -209,6 +210,9 @@ #elif defined(__riscv) && (__riscv_xlen == 64) # define POCO_ARCH POCO_ARCH_RISCV64 # define POCO_ARCH_LITTLE_ENDIAN 1 +#elif defined(__loongarch64) +# define POCO_ARCH POCO_ARCH_LOONGARCH64 +# define POCO_ARCH_LITTLE_ENDIAN 1 #endif diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 5ec05e49e3c..c992236dc95 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -11,6 +11,8 @@ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x.*|S390X.*)") set (ARCH_S390X 1) elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") set (ARCH_RISCV64 1) +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch64") + set (ARCH_LOONGARCH64 1) else () message (FATAL_ERROR "Platform ${CMAKE_SYSTEM_PROCESSOR} is not supported") endif () diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index f8ff71876c6..dfbbb66a1e9 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54486) +SET(VERSION_REVISION 54487) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 5) +SET(VERSION_MINOR 6) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 6d4b31322d168356c8b10c43b4cef157c82337ff) -SET(VERSION_DESCRIBE v24.5.1.1-testing) -SET(VERSION_STRING 24.5.1.1) +SET(VERSION_GITHASH 70a1d3a63d47f0be077d67b8deb907230fc7cfb0) +SET(VERSION_DESCRIBE v24.6.1.1-testing) +SET(VERSION_STRING 24.6.1.1) # end of autochange diff --git a/cmake/clang_tidy.cmake b/cmake/clang_tidy.cmake index 4323c20463a..4c9331f6283 100644 --- a/cmake/clang_tidy.cmake +++ b/cmake/clang_tidy.cmake @@ -5,14 +5,14 @@ if (ENABLE_CLANG_TIDY) find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache") if (CLANG_TIDY_CACHE_PATH) - find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-17" "clang-tidy-16" "clang-tidy") + find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-18" "clang-tidy-17" "clang-tidy-16" "clang-tidy") # Why do we use ';' here? # It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY # The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax. set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper") else () - find_program (CLANG_TIDY_PATH NAMES "clang-tidy-17" "clang-tidy-16" "clang-tidy") + find_program (CLANG_TIDY_PATH NAMES "clang-tidy-18" "clang-tidy-17" "clang-tidy-16" "clang-tidy") endif () if (CLANG_TIDY_PATH) diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 1eeb1a872bd..6bde75f8c9a 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -1,11 +1,23 @@ set (DEFAULT_LIBS "-nodefaultlibs") if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64") - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-x86_64.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + set(system_processor "x86_64") else () - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + set(system_processor "${CMAKE_SYSTEM_PROCESSOR}") endif () +file(GLOB bprefix "/usr/local/llvm${COMPILER_VERSION_MAJOR}/lib/clang/${COMPILER_VERSION_MAJOR}/lib/${system_processor}-portbld-freebsd*/") +message(STATUS "-Bprefix: ${bprefix}") + +execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +# --print-file-name simply prints what you passed in case of nothing was resolved, so let's try one other possible option +if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins-${system_processor}.a") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins.a") + message(FATAL_ERROR "libclang_rt.builtins had not been found") +endif() + set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread") message(STATUS "Default libraries: ${DEFAULT_LIBS}") diff --git a/cmake/instrument.cmake b/cmake/instrument.cmake new file mode 100644 index 00000000000..bd2fb4d45fc --- /dev/null +++ b/cmake/instrument.cmake @@ -0,0 +1,20 @@ +# https://llvm.org/docs/XRay.html + +option (ENABLE_XRAY "Enable LLVM XRay" OFF) + +if (NOT ENABLE_XRAY) + message (STATUS "Not using LLVM XRay") + return() +endif() + +if (NOT (ARCH_AMD64 AND (OS_LINUX OR OS_FREEBSD))) + message (STATUS "Not using LLVM XRay, only amd64 Linux or FreeBSD are supported") + return() +endif() + +# The target clang must support xray, otherwise it should error on invalid option +set (XRAY_FLAGS "-fxray-instrument -DUSE_XRAY") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${XRAY_FLAGS}") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${XRAY_FLAGS}") + +message (STATUS "Using LLVM XRay") diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index e5ca8e296fc..4a06243243e 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -5,17 +5,15 @@ set (DEFAULT_LIBS "-nodefaultlibs") # We need builtins from Clang's RT even without libcxx - for ubsan+int128. # See https://bugs.llvm.org/show_bug.cgi?id=16404 -if (COMPILER_CLANG) - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) - # Apparently, in clang-19, the UBSan support library for C++ was moved out into ubsan_standalone_cxx.a, so we have to include both. - if (SANITIZE STREQUAL undefined) - string(REPLACE "builtins.a" "ubsan_standalone_cxx.a" EXTRA_BUILTINS_LIBRARY "${BUILTINS_LIBRARY}") - endif () +# Apparently, in clang-19, the UBSan support library for C++ was moved out into ubsan_standalone_cxx.a, so we have to include both. +if (SANITIZE STREQUAL undefined) + string(REPLACE "builtins.a" "ubsan_standalone_cxx.a" EXTRA_BUILTINS_LIBRARY "${BUILTINS_LIBRARY}") +endif () - if (NOT EXISTS "${BUILTINS_LIBRARY}") - set (BUILTINS_LIBRARY "-lgcc") - endif () +if (NOT EXISTS "${BUILTINS_LIBRARY}") + set (BUILTINS_LIBRARY "-lgcc") endif () if (OS_ANDROID) diff --git a/cmake/linux/toolchain-loongarch64.cmake b/cmake/linux/toolchain-loongarch64.cmake new file mode 100644 index 00000000000..bf1ca261183 --- /dev/null +++ b/cmake/linux/toolchain-loongarch64.cmake @@ -0,0 +1,23 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + +set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set (CMAKE_SYSTEM_NAME "Linux") +set (CMAKE_SYSTEM_PROCESSOR "loongarch64") +set (CMAKE_C_COMPILER_TARGET "loongarch64-linux-gnu") +set (CMAKE_CXX_COMPILER_TARGET "loongarch64-linux-gnu") +set (CMAKE_ASM_COMPILER_TARGET "loongarch64-linux-gnu") + +# Adding `-mcmodel=extreme` is to handle the link error: +# relocation R_LARCH_B26 out of range: 194148892 is not in [-134217728, 134217727] +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcmodel=extreme") +set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -mcmodel=extreme") + +set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-loongarch64") + +set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-loongarch64/usr") + +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index a3523203912..08716c1196b 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -26,9 +26,7 @@ if (SANITIZE) elseif (SANITIZE STREQUAL "thread") set (TSAN_FLAGS "-fsanitize=thread") - if (COMPILER_CLANG) - set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/tsan_ignorelist.txt") - endif() + set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/tsan_ignorelist.txt") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}") @@ -44,9 +42,7 @@ if (SANITIZE) # that's why we often receive reports about UIO. The simplest way to avoid this is just set this flag here. set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow") endif() - if (COMPILER_CLANG) - set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/ubsan_ignorelist.txt") - endif() + set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/ubsan_ignorelist.txt") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}") diff --git a/cmake/target.cmake b/cmake/target.cmake index fb911ace7b5..d6c497955f6 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -41,10 +41,7 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_ICU OFF CACHE INTERNAL "") set (ENABLE_FASTOPS OFF CACHE INTERNAL "") elseif (OS_LINUX OR OS_ANDROID) - if (ARCH_AARCH64) - # FIXME: broken dependencies - set (ENABLE_GRPC OFF CACHE INTERNAL "") - elseif (ARCH_PPC64LE) + if (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") elseif (ARCH_RISCV64) # RISC-V support is preliminary @@ -61,6 +58,20 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_RUST OFF CACHE INTERNAL "") + elseif (ARCH_LOONGARCH64) + set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") + set (ENABLE_LDAP OFF CACHE INTERNAL "") + set (OPENSSL_NO_ASM ON CACHE INTERNAL "") + set (ENABLE_JEMALLOC OFF CACHE INTERNAL "") + set (ENABLE_PARQUET OFF CACHE INTERNAL "") + set (ENABLE_GRPC OFF CACHE INTERNAL "") + set (ENABLE_HDFS OFF CACHE INTERNAL "") + set (ENABLE_MYSQL OFF CACHE INTERNAL "") + set (ENABLE_RUST OFF CACHE INTERNAL "") + set (ENABLE_LIBPQXX OFF CACHE INTERNAL "") + set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") + set (ENABLE_DWARF_PARSER OFF CACHE INTERNAL "") + set (ENABLE_BLAKE3 OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 1ba3007b0f3..7aa5d4c51ce 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -1,10 +1,6 @@ # Compiler -if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") - set (COMPILER_CLANG 1) # Safe to treat AppleClang as a regular Clang, in general. -elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set (COMPILER_CLANG 1) -else () +if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") message (FATAL_ERROR "Compiler ${CMAKE_CXX_COMPILER_ID} is not supported") endif () @@ -13,34 +9,30 @@ execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE COMPILER message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}") # Require minimum compiler versions -set (CLANG_MINIMUM_VERSION 16) +set (CLANG_MINIMUM_VERSION 17) set (XCODE_MINIMUM_VERSION 12.0) set (APPLE_CLANG_MINIMUM_VERSION 12.0.0) -if (COMPILER_CLANG) - if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") - # (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it. - if (NOT ALLOW_APPLECLANG AND NOT DEFINED ENV{ALLOW_APPLECLANG}) - message (FATAL_ERROR "Compilation with AppleClang is unsupported. Please use vanilla Clang, e.g. from Homebrew.") - endif () +if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") + # (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it. + if (NOT ALLOW_APPLECLANG AND NOT DEFINED ENV{ALLOW_APPLECLANG}) + message (FATAL_ERROR "Compilation with AppleClang is unsupported. Please use vanilla Clang, e.g. from Homebrew.") + endif () - # For a mapping between XCode / AppleClang / vanilla Clang versions, see https://en.wikipedia.org/wiki/Xcode - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${APPLE_CLANG_MINIMUM_VERSION}) - message (FATAL_ERROR "Compilation with AppleClang version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${APPLE_CLANG_MINIMUM_VERSION} (Xcode ${XCODE_MINIMUM_VERSION}).") - endif () - else () - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION}) - message (FATAL_ERROR "Compilation with Clang version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${CLANG_MINIMUM_VERSION}.") - endif () + # For a mapping between XCode / AppleClang / vanilla Clang versions, see https://en.wikipedia.org/wiki/Xcode + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${APPLE_CLANG_MINIMUM_VERSION}) + message (FATAL_ERROR "Compilation with AppleClang version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${APPLE_CLANG_MINIMUM_VERSION} (Xcode ${XCODE_MINIMUM_VERSION}).") + endif () +else () + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION}) + message (FATAL_ERROR "Compilation with Clang version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${CLANG_MINIMUM_VERSION}.") endif () endif () -# Linker - string (REGEX MATCHALL "[0-9]+" COMPILER_VERSION_LIST ${CMAKE_CXX_COMPILER_VERSION}) list (GET COMPILER_VERSION_LIST 0 COMPILER_VERSION_MAJOR) -# Example values: `lld-10` +# Linker option (LINKER_NAME "Linker name or full path") if (LINKER_NAME MATCHES "gold") @@ -48,19 +40,15 @@ if (LINKER_NAME MATCHES "gold") endif () if (NOT LINKER_NAME) - if (COMPILER_CLANG) - if (OS_LINUX AND NOT ARCH_S390X) - find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld") - elseif (OS_DARWIN) - find_program (LLD_PATH NAMES "ld") - endif () + if (OS_LINUX AND NOT ARCH_S390X) + find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld") + elseif (OS_DARWIN) + find_program (LLD_PATH NAMES "ld") endif () if (LLD_PATH) if (OS_LINUX OR OS_DARWIN) - if (COMPILER_CLANG) - # Clang driver simply allows full linker path. - set (LINKER_NAME ${LLD_PATH}) - endif () + # Clang driver simply allows full linker path. + set (LINKER_NAME ${LLD_PATH}) endif () endif() endif() @@ -82,47 +70,28 @@ else () endif () # Archiver - -if (COMPILER_CLANG) - find_program (LLVM_AR_PATH NAMES "llvm-ar-${COMPILER_VERSION_MAJOR}" "llvm-ar") -endif () - +find_program (LLVM_AR_PATH NAMES "llvm-ar-${COMPILER_VERSION_MAJOR}" "llvm-ar") if (LLVM_AR_PATH) set (CMAKE_AR "${LLVM_AR_PATH}") endif () - message(STATUS "Using archiver: ${CMAKE_AR}") # Ranlib - -if (COMPILER_CLANG) - find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib-${COMPILER_VERSION_MAJOR}" "llvm-ranlib") -endif () - +find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib-${COMPILER_VERSION_MAJOR}" "llvm-ranlib") if (LLVM_RANLIB_PATH) set (CMAKE_RANLIB "${LLVM_RANLIB_PATH}") endif () - message(STATUS "Using ranlib: ${CMAKE_RANLIB}") # Install Name Tool - -if (COMPILER_CLANG) - find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool-${COMPILER_VERSION_MAJOR}" "llvm-install-name-tool") -endif () - +find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool-${COMPILER_VERSION_MAJOR}" "llvm-install-name-tool") if (LLVM_INSTALL_NAME_TOOL_PATH) set (CMAKE_INSTALL_NAME_TOOL "${LLVM_INSTALL_NAME_TOOL_PATH}") endif () - message(STATUS "Using install-name-tool: ${CMAKE_INSTALL_NAME_TOOL}") # Objcopy - -if (COMPILER_CLANG) - find_program (OBJCOPY_PATH NAMES "llvm-objcopy-${COMPILER_VERSION_MAJOR}" "llvm-objcopy" "objcopy") -endif () - +find_program (OBJCOPY_PATH NAMES "llvm-objcopy-${COMPILER_VERSION_MAJOR}" "llvm-objcopy" "objcopy") if (OBJCOPY_PATH) message (STATUS "Using objcopy: ${OBJCOPY_PATH}") else () @@ -130,11 +99,7 @@ else () endif () # Strip - -if (COMPILER_CLANG) - find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") -endif () - +find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") if (STRIP_PATH) message (STATUS "Using strip: ${STRIP_PATH}") else () diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 455e4f09939..807d92d9077 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -15,37 +15,35 @@ if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE add_warning(frame-larger-than=65536) endif () -if (COMPILER_CLANG) - # Add some warnings that are not available even with -Wall -Wextra -Wpedantic. - # We want to get everything out of the compiler for code quality. - add_warning(everything) - add_warning(pedantic) - no_warning(zero-length-array) - no_warning(c++98-compat-pedantic) - no_warning(c++98-compat) - no_warning(c++20-compat) # Use constinit in C++20 without warnings - no_warning(sign-conversion) - no_warning(implicit-int-conversion) - no_warning(implicit-int-float-conversion) - no_warning(ctad-maybe-unsupported) # clang 9+, linux-only - no_warning(disabled-macro-expansion) - no_warning(documentation-unknown-command) - no_warning(double-promotion) - no_warning(exit-time-destructors) - no_warning(float-equal) - no_warning(global-constructors) - no_warning(missing-prototypes) - no_warning(missing-variable-declarations) - no_warning(padded) - no_warning(switch-enum) - no_warning(undefined-func-template) - no_warning(unused-template) - no_warning(vla) - no_warning(weak-template-vtables) - no_warning(weak-vtables) - no_warning(thread-safety-negative) # experimental flag, too many false positives - no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16 - no_warning(unsafe-buffer-usage) # too aggressive - no_warning(switch-default) # conflicts with "defaults in a switch covering all enum values" - # TODO Enable conversion, sign-conversion, double-promotion warnings. -endif () +# Add some warnings that are not available even with -Wall -Wextra -Wpedantic. +# We want to get everything out of the compiler for code quality. +add_warning(everything) +add_warning(pedantic) +no_warning(zero-length-array) +no_warning(c++98-compat-pedantic) +no_warning(c++98-compat) +no_warning(c++20-compat) # Use constinit in C++20 without warnings +no_warning(sign-conversion) +no_warning(implicit-int-conversion) +no_warning(implicit-int-float-conversion) +no_warning(ctad-maybe-unsupported) # clang 9+, linux-only +no_warning(disabled-macro-expansion) +no_warning(documentation-unknown-command) +no_warning(double-promotion) +no_warning(exit-time-destructors) +no_warning(float-equal) +no_warning(global-constructors) +no_warning(missing-prototypes) +no_warning(missing-variable-declarations) +no_warning(padded) +no_warning(switch-enum) +no_warning(undefined-func-template) +no_warning(unused-template) +no_warning(vla) +no_warning(weak-template-vtables) +no_warning(weak-vtables) +no_warning(thread-safety-negative) # experimental flag, too many false positives +no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16 +no_warning(unsafe-buffer-usage) # too aggressive +no_warning(switch-default) # conflicts with "defaults in a switch covering all enum values" +# TODO Enable conversion, sign-conversion, double-promotion warnings. diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index e6c3268c57a..7372195bb0d 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -52,7 +52,7 @@ function(absl_cc_library) ) target_include_directories(${_NAME} - PUBLIC "${ABSL_COMMON_INCLUDE_DIRS}") + SYSTEM PUBLIC "${ABSL_COMMON_INCLUDE_DIRS}") target_compile_options(${_NAME} PRIVATE ${ABSL_CC_LIB_COPTS}) target_compile_definitions(${_NAME} PUBLIC ${ABSL_CC_LIB_DEFINES}) @@ -61,7 +61,7 @@ function(absl_cc_library) # Generating header-only library add_library(${_NAME} INTERFACE) target_include_directories(${_NAME} - INTERFACE "${ABSL_COMMON_INCLUDE_DIRS}") + SYSTEM INTERFACE "${ABSL_COMMON_INCLUDE_DIRS}") target_link_libraries(${_NAME} INTERFACE diff --git a/contrib/arrow b/contrib/arrow index 8f36d71d185..5cfccd8ea65 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb +Subproject commit 5cfccd8ea65f33d4517e7409815d761c7650b45d diff --git a/contrib/aws b/contrib/aws index 2e12d7c6daf..deeaa9e7c5f 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 2e12d7c6dafa81311ee3d73ac6a178550ffa75be +Subproject commit deeaa9e7c5fe690e3dacc4005d7ecfa7a66a32bb diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 2c60fc0e552..7191393533b 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -140,6 +140,12 @@ elseif (ARCH_RISCV64) "${LIBRARY_DIR}/libs/context/src/asm/make_riscv64_sysv_elf_gas.S" "${LIBRARY_DIR}/libs/context/src/asm/ontop_riscv64_sysv_elf_gas.S" ) +elseif (ARCH_LOONGARCH64) + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_loongarch64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_loongarch64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_loongarch64_sysv_elf_gas.S" + ) elseif(OS_DARWIN) set (SRCS_CONTEXT ${SRCS_CONTEXT} "${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_macho_gas.S" diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt index e76268592ee..c07e9e6925b 100644 --- a/contrib/capnproto-cmake/CMakeLists.txt +++ b/contrib/capnproto-cmake/CMakeLists.txt @@ -81,9 +81,7 @@ set (CAPNPC_SRCS add_library(_capnpc ${CAPNPC_SRCS}) target_link_libraries(_capnpc PUBLIC _capnp) -if (COMPILER_CLANG) - set (CAPNP_PRIVATE_CXX_FLAGS -fno-char8_t) -endif () +set (CAPNP_PRIVATE_CXX_FLAGS -fno-char8_t) target_compile_options(_kj PRIVATE ${CAPNP_PRIVATE_CXX_FLAGS}) target_compile_options(_capnp PRIVATE ${CAPNP_PRIVATE_CXX_FLAGS}) diff --git a/contrib/datasketches-cpp b/contrib/datasketches-cpp index c3abaaefe5f..76edd74f5db 160000 --- a/contrib/datasketches-cpp +++ b/contrib/datasketches-cpp @@ -1 +1 @@ -Subproject commit c3abaaefe5fa400eed99e082af07c1b61a7144db +Subproject commit 76edd74f5db286b672c170a8ded4ce39b3a8800f diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index b8b5f5580c4..1c0bf41ff78 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -31,3 +31,123 @@ add_library(_ch_contrib_grpc INTERFACE) target_link_libraries(_ch_contrib_grpc INTERFACE ${gRPC_LIBRARIES}) target_include_directories(_ch_contrib_grpc SYSTEM INTERFACE ${gRPC_INCLUDE_DIRS}) add_library(ch_contrib::grpc ALIAS _ch_contrib_grpc) + +# Here we are trying to build a binary tool grpc_cpp_plugin in case of cross-compilation. +# We need this file only during compilation process itself so we need it for our "host" +# platform, not "target" platform. +# If we are doing normal compilation this executable will be produced in grpc.cmake. +# +# All code inside this block looks so weird because cmake fundametally doesn't +# support different toolchains for different targets. So we just running it +# in "bash script" fashion with different (actually without, i.e. default) toolchain. +# +# FIXME Sorry, I don't know cmake. +if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME + OR NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR) + + # First we need to build openssl for host plaform + set(OPENSSL_BUILD_DIR "${_gRPC_BINARY_DIR}/build_openssl") + + set(OPENSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake") + + execute_process( + COMMAND mkdir -p ${OPENSSL_BUILD_DIR} + COMMAND_ECHO STDOUT + ) + + if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") + set (HOST_ARCH_AMD64 1) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") + set (HOST_ARCH_AARCH64 1) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(powerpc64le.*|ppc64le.*|PPC64LE.*)") + set (HOST_ARCH_PPC64LE 1) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(s390x.*|S390X.*)") + set (HOST_ARCH_S390X 1) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "riscv64") + set (HOST_ARCH_RISCV64 1) + endif () + + if (CMAKE_HOST_SYSTEM_NAME MATCHES "Linux") + set (HOST_OS_LINUX 1) + elseif (CMAKE_HOST_SYSTEM_NAME MATCHES "Darwin") + set (HOST_OS_DARWIN 1) + endif () + execute_process( + COMMAND ${CMAKE_COMMAND} + "-G${CMAKE_GENERATOR}" + "-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}" + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + "-DARCH_AMD64=${HOST_ARCH_AMD64}" + "-DARCH_AARCH64=${HOST_ARCH_AARCH64}" + "-DARCH_PPC64LE=${HOST_ARCH_PPC64LE}" + "-DARCH_S390X=${HOST_ARCH_S390X}" + "-DARCH_RISCV64=${HOST_ARCH_RISCV64}" + "-DOS_DARWIN=${HOST_OS_DARWIN}" + "-DOPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION=1" + "-DClickHouse_BINARY_DIR=${ClickHouse_BINARY_DIR}" + "-DClickHouse_SOURCE_DIR=${ClickHouse_SOURCE_DIR}" + "${OPENSSL_SOURCE_DIR}" + WORKING_DIRECTORY "${OPENSSL_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + execute_process( + COMMAND ${CMAKE_COMMAND} --build "${OPENSSL_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + execute_process( + COMMAND ${CMAKE_COMMAND} --install "${OPENSSL_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + # It's not important on which file we depend, we just want to specify right order + add_library(openssl_for_grpc STATIC IMPORTED GLOBAL) + set_target_properties (openssl_for_grpc PROPERTIES IMPORTED_LOCATION "${OPENSSL_BUILD_DIR}/libssl.a") + add_dependencies(openssl_for_grpc "${OPENSSL_BUILD_DIR}/libssl.a") + + # Okay, openssl ready, let's build grpc_cpp_plugin + set (GRPC_CPP_PLUGIN_BUILD_DIR "${_gRPC_BINARY_DIR}/build") + + execute_process( + COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR} + COMMAND_ECHO STDOUT + ) + + set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") + set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf") + set(re2_source_dir "${ClickHouse_SOURCE_DIR}/contrib/re2") + set(ssl_source_dir "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake") + set(zlib_source_dir "${ClickHouse_SOURCE_DIR}/contrib/zlib-ng") + # For some reason config exists only in this directory + set(zlib_config_source_dir "${ClickHouse_BINARY_DIR}/contrib/zlib-ng-cmake") + set(cares_source_dir "${ClickHouse_SOURCE_DIR}/contrib/c-ares") + + execute_process( + COMMAND ${CMAKE_COMMAND} + "-G${CMAKE_GENERATOR}" + "-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}" + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + "-DABSL_ROOT_DIR=${abseil_source_dir}" + "-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${zlib_config_source_dir}" + "-DgRPC_INSTALL=0" + "-DABSL_ENABLE_INSTALL=1" + "-DPROTOBUF_ROOT_DIR=${protobuf_source_dir}" + "-DRE2_ROOT_DIR=${re2_source_dir}" + "-DCARES_ROOT_DIR=${cares_source_dir}" + "-DOPENSSL_ROOT_DIR=${OPENSSL_BUILD_DIR}" + "-DOPENSSL_INCLUDE_DIR=${OPENSSL_BUILD_DIR}/include" + "-DZLIB_ROOT_DIR=${zlib_source_dir}" + "-DgRPC_SSL_PROVIDER=package" + "${_gRPC_SOURCE_DIR}" + WORKING_DIRECTORY "${GRPC_CPP_PLUGIN_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + execute_process( + COMMAND ${CMAKE_COMMAND} --build "${GRPC_CPP_PLUGIN_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + add_executable(grpc_cpp_plugin IMPORTED GLOBAL) + set_target_properties (grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") + add_dependencies(grpc_cpp_plugin "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") + add_dependencies(grpc_cpp_plugin openssl_for_grpc) +endif() diff --git a/contrib/grpc-cmake/grpc.cmake b/contrib/grpc-cmake/grpc.cmake index c2488539211..39645938f14 100644 --- a/contrib/grpc-cmake/grpc.cmake +++ b/contrib/grpc-cmake/grpc.cmake @@ -1829,6 +1829,8 @@ target_link_libraries(grpc_plugin_support ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} ) +if (CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME + AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR) add_executable(grpc_cpp_plugin ${_gRPC_SOURCE_DIR}/src/compiler/cpp_plugin.cc @@ -1852,3 +1854,5 @@ target_link_libraries(grpc_cpp_plugin ${_gRPC_ALLTARGETS_LIBRARIES} grpc_plugin_support ) + +endif() diff --git a/contrib/libbcrypt-cmake/CMakeLists.txt b/contrib/libbcrypt-cmake/CMakeLists.txt index d40d7f9195e..9e97f0af493 100644 --- a/contrib/libbcrypt-cmake/CMakeLists.txt +++ b/contrib/libbcrypt-cmake/CMakeLists.txt @@ -7,7 +7,7 @@ endif() set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libbcrypt") -set(SRCS +set(SRCS "${LIBRARY_DIR}/bcrypt.c" "${LIBRARY_DIR}/crypt_blowfish/crypt_blowfish.c" "${LIBRARY_DIR}/crypt_blowfish/crypt_gensalt.c" @@ -16,4 +16,13 @@ set(SRCS add_library(_bcrypt ${SRCS}) target_include_directories(_bcrypt SYSTEM PUBLIC "${LIBRARY_DIR}") +# Avoid conflicts for crypt_r on FreeBSD [1]: +# +# - char *crypt_r(__const char *key, __const char *setting, void *data); +# - char *crypt_r(const char *, const char *, struct crypt_data *); +# +# [1]: https://github.com/freebsd/freebsd-src/commit/5f521d7ba72145092ea23ff6081d8791ad6c1f9d +# +# NOTE: ow-crypt.h is unsed only internally, so PRIVATE is enough +target_compile_definitions(_bcrypt PRIVATE -D__SKIP_GNU) add_library(ch_contrib::bcrypt ALIAS _bcrypt) diff --git a/contrib/librdkafka-cmake/config.h.in b/contrib/librdkafka-cmake/config.h.in index 52ae70aeea8..f6ec3bc0e79 100644 --- a/contrib/librdkafka-cmake/config.h.in +++ b/contrib/librdkafka-cmake/config.h.in @@ -66,7 +66,7 @@ #cmakedefine WITH_SASL_OAUTHBEARER 1 #cmakedefine WITH_SASL_CYRUS 1 // crc32chw -#if !defined(__PPC__) && !defined(__riscv) && !defined(__aarch64__) && !defined(__s390x__) +#if !defined(__PPC__) && !defined(__riscv) && !defined(__aarch64__) && !defined(__s390x__) && !defined(__loongarch64) #define WITH_CRC32C_HW 1 #endif // regex diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt index bd051195864..4cc3e2a1831 100644 --- a/contrib/libssh-cmake/CMakeLists.txt +++ b/contrib/libssh-cmake/CMakeLists.txt @@ -110,6 +110,8 @@ if (OS_LINUX) target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/s390x") elseif (ARCH_RISCV64) target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/riscv64") + elseif (ARCH_LOONGARCH64) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/loongarch64") else () message(FATAL_ERROR "Platform is not supported") endif () diff --git a/contrib/libssh-cmake/linux/loongarch64/config.h b/contrib/libssh-cmake/linux/loongarch64/config.h new file mode 100644 index 00000000000..aa684ca29a3 --- /dev/null +++ b/contrib/libssh-cmake/linux/loongarch64/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/loongarch64" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +/* #undef HAVE_EXPLICIT_BZERO 1 */ + +/* Define to 1 if you have the `memset_s' function. */ +/* #undef HAVE_MEMSET_S */ + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libunwind b/contrib/libunwind index 40d8eadf96b..d6a01c46327 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 40d8eadf96b127d9b22d53ce7a4fc52aaedea965 +Subproject commit d6a01c46327e56fd86beb8aaa31591fcd9a6b7df diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 8f3cd8bd07b..37a2f29afcf 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -31,7 +31,9 @@ add_library(unwind ${LIBUNWIND_SOURCES}) set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) -target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) +target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1) +# NOTE: from this macros sizeof(unw_context_t)/sizeof(unw_cursor_t) is depends, so it should be set always +target_compile_definitions(unwind PUBLIC -D_LIBUNWIND_IS_NATIVE_ONLY) # We should enable optimizations (otherwise it will be too slow in debug) # and disable sanitizers (otherwise infinite loop may happen) diff --git a/contrib/lz4 b/contrib/lz4 index ce45a9dbdb0..145f3804ca5 160000 --- a/contrib/lz4 +++ b/contrib/lz4 @@ -1 +1 @@ -Subproject commit ce45a9dbdb059511a3e9576b19db3e7f1a4f172e +Subproject commit 145f3804ca5ef5482cda0f2a4f6a2d04ba57f965 diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index 021c88bcb04..85de0340996 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -32,9 +32,16 @@ set(OPENSSLDIR "/etc/ssl" CACHE PATH "Set the default openssl directory") set(OPENSSL_ENGINESDIR "/usr/local/lib/engines-3" CACHE PATH "Set the default openssl directory for engines") set(OPENSSL_MODULESDIR "/usr/local/lib/ossl-modules" CACHE PATH "Set the default openssl directory for modules") -add_definitions(-DOPENSSL_NO_KTLS -DOPENSSLDIR="${OPENSSLDIR}" -DENGINESDIR="${OPENSSL_ENGINESDIR}" -DMODULESDIR="${OPENSSL_MODULESDIR}" -DOPENSSL_USE_NODELETE -DOPENSSL_PIC) -target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations") -target_compile_options(global-group INTERFACE "-Wno-poison-system-directories") +# special type of build during cross-compilation +if(OPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION) + add_definitions(-DOPENSSL_NO_KTLS -DOPENSSLDIR="\\\"${OPENSSLDIR}\\\"" -DENGINESDIR="\\\"${OPENSSL_ENGINESDIR}\\\"" -DMODULESDIR="\\\"${OPENSSL_MODULESDIR}\\\"" -DOPENSSL_USE_NODELETE -DOPENSSL_PIC) + add_compile_options("-Wno-deprecated-declarations") + add_compile_options("-Wno-poison-system-directories") +else() + add_definitions(-DOPENSSL_NO_KTLS -DOPENSSLDIR="${OPENSSLDIR}" -DENGINESDIR="${OPENSSL_ENGINESDIR}" -DMODULESDIR="${OPENSSL_MODULESDIR}" -DOPENSSL_USE_NODELETE -DOPENSSL_PIC) + target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations") + target_compile_options(global-group INTERFACE "-Wno-poison-system-directories") +endif() if(ARCH_AMD64) if(OS_DARWIN) @@ -61,6 +68,9 @@ elseif(ARCH_S390X) elseif(ARCH_RISCV64) set(PLATFORM_DIRECTORY linux_riscv64) add_definitions(-DOPENSSL_CPUID_OBJ -DL_ENDIAN) +elseif(ARCH_LOONGARCH64) + set(PLATFORM_DIRECTORY linux_loongarch64) + add_definitions(-DOPENSSL_CPUID_OBJ -DL_ENDIAN) endif() file(STRINGS "${PLATFORM_DIRECTORY}/include/openssl/opensslv.h" OPENSSL_VERSION_STR @@ -91,12 +101,10 @@ set(LIB_SOVERSION ${VERSION_MAJOR}) enable_language(ASM) -if(COMPILER_CLANG) - add_definitions(-Wno-unused-command-line-argument) - # Note that s390x build uses mold linker - if(NOT ARCH_S390X) - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld") # only relevant for -DENABLE_OPENSSL_DYNAMIC=1 - endif() +add_definitions(-Wno-unused-command-line-argument) +# Note that s390x build uses mold linker +if(NOT ARCH_S390X) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld") # only relevant for -DENABLE_OPENSSL_DYNAMIC=1 endif() if(ARCH_AMD64) @@ -206,6 +214,13 @@ elseif(ARCH_RISCV64) perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/riscv64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/riscv64cpuid.S) perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aes-riscv64-zkn.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aes-riscv64-zkn.S) perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/ghash-riscv64.pl ${OPENSSL_BINARY_DIR}/crypto/modes/ghash-riscv64.S) +elseif(ARCH_LOONGARCH64) + macro(perl_generate_asm FILE_IN FILE_OUT) + add_custom_command(OUTPUT ${FILE_OUT} + COMMAND ${CMAKE_COMMAND} -E env "CC=${CMAKE_CXX_COMPILER}" /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) + endmacro() + + perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/loongarch64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/loongarch64cpuid.S) endif() set(CRYPTO_SRC @@ -1326,6 +1341,24 @@ elseif(ARCH_RISCV64) ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c ${OPENSSL_SOURCE_DIR}/crypto/riscvcap.c ) +elseif(ARCH_LOONGARCH64) + set(CRYPTO_SRC ${CRYPTO_SRC} + ${OPENSSL_BINARY_DIR}/crypto/loongarch64cpuid.S + ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c + ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_asm.c + ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c + ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_dgst.c + ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_nistz256.c + ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c + ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c + ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c + ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c + ${OPENSSL_SOURCE_DIR}/crypto/loongarchcap.c + ) endif() set(SSL_SRC @@ -1447,4 +1480,9 @@ target_link_libraries(ssl crypto) add_library(OpenSSL::Crypto ALIAS crypto) add_library(OpenSSL::SSL ALIAS ssl) -install(FILES openssl.conf fipsmodule.conf DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) +if(OPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION) + install(DIRECTORY "${PLATFORM_DIRECTORY}/include" DESTINATION "${CMAKE_BINARY_DIR}") + install(DIRECTORY "${OPENSSL_SOURCE_DIR}/include" DESTINATION "${CMAKE_BINARY_DIR}") +else() + install(FILES openssl.conf fipsmodule.conf DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) +endif() diff --git a/contrib/openssl-cmake/linux_loongarch64/include/crypto/bn_conf.h b/contrib/openssl-cmake/linux_loongarch64/include/crypto/bn_conf.h new file mode 100644 index 00000000000..0347a6ddc06 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/crypto/bn_conf.h @@ -0,0 +1,29 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from include/crypto/bn_conf.h.in */ +/* + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OSSL_CRYPTO_BN_CONF_H +# define OSSL_CRYPTO_BN_CONF_H +# pragma once + +/* + * The contents of this file are not used in the UEFI build, as + * both 32-bit and 64-bit builds are supported from a single run + * of the Configure script. + */ + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/crypto/dso_conf.h b/contrib/openssl-cmake/linux_loongarch64/include/crypto/dso_conf.h new file mode 100644 index 00000000000..795dfa0f1a6 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/crypto/dso_conf.h @@ -0,0 +1,19 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from include/crypto/dso_conf.h.in */ +/* + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OSSL_CRYPTO_DSO_CONF_H +# define OSSL_CRYPTO_DSO_CONF_H +# pragma once + +# define DSO_DLFCN +# define HAVE_DLFCN_H +# define DSO_EXTENSION ".so" +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/internal/param_names.h b/contrib/openssl-cmake/linux_loongarch64/include/internal/param_names.h new file mode 100644 index 00000000000..e721d071617 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/internal/param_names.h @@ -0,0 +1,376 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/internal/param_names.h.in + * + * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +int ossl_param_find_pidx(const char *s); + +/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */ +#define NUM_PIDX 290 + +#define PIDX_ALG_PARAM_CIPHER 0 +#define PIDX_ALG_PARAM_DIGEST 1 +#define PIDX_ALG_PARAM_ENGINE 2 +#define PIDX_ALG_PARAM_MAC 3 +#define PIDX_ALG_PARAM_PROPERTIES 4 +#define PIDX_ASYM_CIPHER_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_ENGINE PIDX_PKEY_PARAM_ENGINE +#define PIDX_ASYM_CIPHER_PARAM_IMPLICIT_REJECTION 5 +#define PIDX_ASYM_CIPHER_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_MGF1_DIGEST_PROPS PIDX_PKEY_PARAM_MGF1_PROPERTIES +#define PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS 6 +#define PIDX_ASYM_CIPHER_PARAM_OAEP_LABEL 7 +#define PIDX_ASYM_CIPHER_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE +#define PIDX_ASYM_CIPHER_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION 8 +#define PIDX_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION 9 +#define PIDX_CAPABILITY_TLS_GROUP_ALG 10 +#define PIDX_CAPABILITY_TLS_GROUP_ID 11 +#define PIDX_CAPABILITY_TLS_GROUP_IS_KEM 12 +#define PIDX_CAPABILITY_TLS_GROUP_MAX_DTLS 13 +#define PIDX_CAPABILITY_TLS_GROUP_MAX_TLS 14 +#define PIDX_CAPABILITY_TLS_GROUP_MIN_DTLS 15 +#define PIDX_CAPABILITY_TLS_GROUP_MIN_TLS 16 +#define PIDX_CAPABILITY_TLS_GROUP_NAME 17 +#define PIDX_CAPABILITY_TLS_GROUP_NAME_INTERNAL 18 +#define PIDX_CAPABILITY_TLS_GROUP_SECURITY_BITS 19 +#define PIDX_CAPABILITY_TLS_SIGALG_CODE_POINT 20 +#define PIDX_CAPABILITY_TLS_SIGALG_HASH_NAME 21 +#define PIDX_CAPABILITY_TLS_SIGALG_HASH_OID 22 +#define PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME 23 +#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE 24 +#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID 25 +#define PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS 14 +#define PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS 16 +#define PIDX_CAPABILITY_TLS_SIGALG_NAME 26 +#define PIDX_CAPABILITY_TLS_SIGALG_OID 27 +#define PIDX_CAPABILITY_TLS_SIGALG_SECURITY_BITS 28 +#define PIDX_CAPABILITY_TLS_SIGALG_SIG_NAME 29 +#define PIDX_CAPABILITY_TLS_SIGALG_SIG_OID 30 +#define PIDX_CIPHER_PARAM_AEAD 31 +#define PIDX_CIPHER_PARAM_AEAD_IVLEN PIDX_CIPHER_PARAM_IVLEN +#define PIDX_CIPHER_PARAM_AEAD_MAC_KEY 32 +#define PIDX_CIPHER_PARAM_AEAD_TAG 33 +#define PIDX_CIPHER_PARAM_AEAD_TAGLEN 34 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_AAD 35 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_AAD_PAD 36 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN 37 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_IV_FIXED 38 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV 39 +#define PIDX_CIPHER_PARAM_ALGORITHM_ID_PARAMS 40 +#define PIDX_CIPHER_PARAM_BLOCK_SIZE 41 +#define PIDX_CIPHER_PARAM_CTS 42 +#define PIDX_CIPHER_PARAM_CTS_MODE 43 +#define PIDX_CIPHER_PARAM_CUSTOM_IV 44 +#define PIDX_CIPHER_PARAM_HAS_RAND_KEY 45 +#define PIDX_CIPHER_PARAM_IV 46 +#define PIDX_CIPHER_PARAM_IVLEN 47 +#define PIDX_CIPHER_PARAM_KEYLEN 48 +#define PIDX_CIPHER_PARAM_MODE 49 +#define PIDX_CIPHER_PARAM_NUM 50 +#define PIDX_CIPHER_PARAM_PADDING 51 +#define PIDX_CIPHER_PARAM_RANDOM_KEY 52 +#define PIDX_CIPHER_PARAM_RC2_KEYBITS 53 +#define PIDX_CIPHER_PARAM_ROUNDS 54 +#define PIDX_CIPHER_PARAM_SPEED 55 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 56 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 57 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 58 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 59 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 60 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 61 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 62 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 63 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 64 +#define PIDX_CIPHER_PARAM_TLS_MAC 65 +#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 66 +#define PIDX_CIPHER_PARAM_TLS_VERSION 67 +#define PIDX_CIPHER_PARAM_UPDATED_IV 68 +#define PIDX_CIPHER_PARAM_USE_BITS 69 +#define PIDX_CIPHER_PARAM_XTS_STANDARD 70 +#define PIDX_DECODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_DIGEST_PARAM_ALGID_ABSENT 71 +#define PIDX_DIGEST_PARAM_BLOCK_SIZE 41 +#define PIDX_DIGEST_PARAM_MICALG 72 +#define PIDX_DIGEST_PARAM_PAD_TYPE 73 +#define PIDX_DIGEST_PARAM_SIZE 74 +#define PIDX_DIGEST_PARAM_SSL3_MS 75 +#define PIDX_DIGEST_PARAM_XOF 76 +#define PIDX_DIGEST_PARAM_XOFLEN 77 +#define PIDX_DRBG_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_DRBG_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 78 +#define PIDX_DRBG_PARAM_MAC PIDX_ALG_PARAM_MAC +#define PIDX_DRBG_PARAM_MAX_ADINLEN 79 +#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 80 +#define PIDX_DRBG_PARAM_MAX_LENGTH 81 +#define PIDX_DRBG_PARAM_MAX_NONCELEN 82 +#define PIDX_DRBG_PARAM_MAX_PERSLEN 83 +#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 84 +#define PIDX_DRBG_PARAM_MIN_LENGTH 85 +#define PIDX_DRBG_PARAM_MIN_NONCELEN 86 +#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 87 +#define PIDX_DRBG_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_DRBG_PARAM_RANDOM_DATA 88 +#define PIDX_DRBG_PARAM_RESEED_COUNTER 89 +#define PIDX_DRBG_PARAM_RESEED_REQUESTS 90 +#define PIDX_DRBG_PARAM_RESEED_TIME 91 +#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 92 +#define PIDX_DRBG_PARAM_SIZE 74 +#define PIDX_DRBG_PARAM_USE_DF 93 +#define PIDX_ENCODER_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 94 +#define PIDX_ENCODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 95 +#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 96 +#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 97 +#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 98 +#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 99 +#define PIDX_EXCHANGE_PARAM_KDF_TYPE 100 +#define PIDX_EXCHANGE_PARAM_KDF_UKM 101 +#define PIDX_EXCHANGE_PARAM_PAD 102 +#define PIDX_GEN_PARAM_ITERATION 103 +#define PIDX_GEN_PARAM_POTENTIAL 104 +#define PIDX_KDF_PARAM_ARGON2_AD 105 +#define PIDX_KDF_PARAM_ARGON2_LANES 106 +#define PIDX_KDF_PARAM_ARGON2_MEMCOST 107 +#define PIDX_KDF_PARAM_ARGON2_VERSION 108 +#define PIDX_KDF_PARAM_CEK_ALG 109 +#define PIDX_KDF_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_KDF_PARAM_CONSTANT 110 +#define PIDX_KDF_PARAM_DATA 111 +#define PIDX_KDF_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_KDF_PARAM_EARLY_CLEAN 112 +#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 113 +#define PIDX_KDF_PARAM_HMACDRBG_NONCE 114 +#define PIDX_KDF_PARAM_INFO 115 +#define PIDX_KDF_PARAM_ITER 116 +#define PIDX_KDF_PARAM_KBKDF_R 117 +#define PIDX_KDF_PARAM_KBKDF_USE_L 118 +#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 119 +#define PIDX_KDF_PARAM_KEY 120 +#define PIDX_KDF_PARAM_LABEL 121 +#define PIDX_KDF_PARAM_MAC PIDX_ALG_PARAM_MAC +#define PIDX_KDF_PARAM_MAC_SIZE 122 +#define PIDX_KDF_PARAM_MODE 49 +#define PIDX_KDF_PARAM_PASSWORD 123 +#define PIDX_KDF_PARAM_PKCS12_ID 124 +#define PIDX_KDF_PARAM_PKCS5 125 +#define PIDX_KDF_PARAM_PREFIX 126 +#define PIDX_KDF_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_KDF_PARAM_SALT 127 +#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 128 +#define PIDX_KDF_PARAM_SCRYPT_N 129 +#define PIDX_KDF_PARAM_SCRYPT_P 130 +#define PIDX_KDF_PARAM_SCRYPT_R 117 +#define PIDX_KDF_PARAM_SECRET 131 +#define PIDX_KDF_PARAM_SEED 132 +#define PIDX_KDF_PARAM_SIZE 74 +#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 133 +#define PIDX_KDF_PARAM_SSHKDF_TYPE 134 +#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 135 +#define PIDX_KDF_PARAM_THREADS 136 +#define PIDX_KDF_PARAM_UKM 137 +#define PIDX_KDF_PARAM_X942_ACVPINFO 138 +#define PIDX_KDF_PARAM_X942_PARTYUINFO 139 +#define PIDX_KDF_PARAM_X942_PARTYVINFO 140 +#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 141 +#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 142 +#define PIDX_KDF_PARAM_X942_USE_KEYBITS 143 +#define PIDX_KEM_PARAM_IKME 144 +#define PIDX_KEM_PARAM_OPERATION 145 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 146 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 147 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 148 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE 49 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 149 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 150 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 151 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 152 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 153 +#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 154 +#define PIDX_MAC_PARAM_BLOCK_SIZE 155 +#define PIDX_MAC_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_MAC_PARAM_CUSTOM 156 +#define PIDX_MAC_PARAM_C_ROUNDS 157 +#define PIDX_MAC_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_MAC_PARAM_DIGEST_NOINIT 158 +#define PIDX_MAC_PARAM_DIGEST_ONESHOT 159 +#define PIDX_MAC_PARAM_D_ROUNDS 160 +#define PIDX_MAC_PARAM_IV 46 +#define PIDX_MAC_PARAM_KEY 120 +#define PIDX_MAC_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_MAC_PARAM_SALT 127 +#define PIDX_MAC_PARAM_SIZE 74 +#define PIDX_MAC_PARAM_TLS_DATA_SIZE 161 +#define PIDX_MAC_PARAM_XOF 76 +#define PIDX_OBJECT_PARAM_DATA 111 +#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 162 +#define PIDX_OBJECT_PARAM_DATA_TYPE 163 +#define PIDX_OBJECT_PARAM_DESC 164 +#define PIDX_OBJECT_PARAM_REFERENCE 165 +#define PIDX_OBJECT_PARAM_TYPE 134 +#define PIDX_PASSPHRASE_PARAM_INFO 115 +#define PIDX_PKEY_PARAM_BITS 166 +#define PIDX_PKEY_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 167 +#define PIDX_PKEY_PARAM_DHKEM_IKM 168 +#define PIDX_PKEY_PARAM_DH_GENERATOR 169 +#define PIDX_PKEY_PARAM_DH_PRIV_LEN 170 +#define PIDX_PKEY_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_PKEY_PARAM_DIGEST_SIZE 171 +#define PIDX_PKEY_PARAM_DIST_ID 172 +#define PIDX_PKEY_PARAM_EC_A 173 +#define PIDX_PKEY_PARAM_EC_B 174 +#define PIDX_PKEY_PARAM_EC_CHAR2_M 175 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 176 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 177 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 178 +#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 179 +#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 180 +#define PIDX_PKEY_PARAM_EC_COFACTOR 181 +#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 182 +#define PIDX_PKEY_PARAM_EC_ENCODING 183 +#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 184 +#define PIDX_PKEY_PARAM_EC_GENERATOR 185 +#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 186 +#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 187 +#define PIDX_PKEY_PARAM_EC_ORDER 188 +#define PIDX_PKEY_PARAM_EC_P 130 +#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 189 +#define PIDX_PKEY_PARAM_EC_PUB_X 190 +#define PIDX_PKEY_PARAM_EC_PUB_Y 191 +#define PIDX_PKEY_PARAM_EC_SEED 132 +#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 192 +#define PIDX_PKEY_PARAM_ENGINE PIDX_ALG_PARAM_ENGINE +#define PIDX_PKEY_PARAM_FFC_COFACTOR 193 +#define PIDX_PKEY_PARAM_FFC_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_PKEY_PARAM_FFC_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_FFC_G 194 +#define PIDX_PKEY_PARAM_FFC_GINDEX 195 +#define PIDX_PKEY_PARAM_FFC_H 196 +#define PIDX_PKEY_PARAM_FFC_P 130 +#define PIDX_PKEY_PARAM_FFC_PBITS 197 +#define PIDX_PKEY_PARAM_FFC_PCOUNTER 198 +#define PIDX_PKEY_PARAM_FFC_Q 199 +#define PIDX_PKEY_PARAM_FFC_QBITS 200 +#define PIDX_PKEY_PARAM_FFC_SEED 132 +#define PIDX_PKEY_PARAM_FFC_TYPE 134 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 201 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 202 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 203 +#define PIDX_PKEY_PARAM_GROUP_NAME 204 +#define PIDX_PKEY_PARAM_IMPLICIT_REJECTION 5 +#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 205 +#define PIDX_PKEY_PARAM_MASKGENFUNC 206 +#define PIDX_PKEY_PARAM_MAX_SIZE 207 +#define PIDX_PKEY_PARAM_MGF1_DIGEST 208 +#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 209 +#define PIDX_PKEY_PARAM_PAD_MODE 210 +#define PIDX_PKEY_PARAM_PRIV_KEY 211 +#define PIDX_PKEY_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_PUB_KEY 212 +#define PIDX_PKEY_PARAM_RSA_BITS PIDX_PKEY_PARAM_BITS +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 213 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 214 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 215 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 216 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 217 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 218 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 219 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 220 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 221 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 222 +#define PIDX_PKEY_PARAM_RSA_D 223 +#define PIDX_PKEY_PARAM_RSA_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_PKEY_PARAM_RSA_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_RSA_E 224 +#define PIDX_PKEY_PARAM_RSA_EXPONENT 225 +#define PIDX_PKEY_PARAM_RSA_EXPONENT1 226 +#define PIDX_PKEY_PARAM_RSA_EXPONENT10 227 +#define PIDX_PKEY_PARAM_RSA_EXPONENT2 228 +#define PIDX_PKEY_PARAM_RSA_EXPONENT3 229 +#define PIDX_PKEY_PARAM_RSA_EXPONENT4 230 +#define PIDX_PKEY_PARAM_RSA_EXPONENT5 231 +#define PIDX_PKEY_PARAM_RSA_EXPONENT6 232 +#define PIDX_PKEY_PARAM_RSA_EXPONENT7 233 +#define PIDX_PKEY_PARAM_RSA_EXPONENT8 234 +#define PIDX_PKEY_PARAM_RSA_EXPONENT9 235 +#define PIDX_PKEY_PARAM_RSA_FACTOR 236 +#define PIDX_PKEY_PARAM_RSA_FACTOR1 237 +#define PIDX_PKEY_PARAM_RSA_FACTOR10 238 +#define PIDX_PKEY_PARAM_RSA_FACTOR2 239 +#define PIDX_PKEY_PARAM_RSA_FACTOR3 240 +#define PIDX_PKEY_PARAM_RSA_FACTOR4 241 +#define PIDX_PKEY_PARAM_RSA_FACTOR5 242 +#define PIDX_PKEY_PARAM_RSA_FACTOR6 243 +#define PIDX_PKEY_PARAM_RSA_FACTOR7 244 +#define PIDX_PKEY_PARAM_RSA_FACTOR8 245 +#define PIDX_PKEY_PARAM_RSA_FACTOR9 246 +#define PIDX_PKEY_PARAM_RSA_MASKGENFUNC PIDX_PKEY_PARAM_MASKGENFUNC +#define PIDX_PKEY_PARAM_RSA_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_PKEY_PARAM_RSA_N 129 +#define PIDX_PKEY_PARAM_RSA_PRIMES 247 +#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 248 +#define PIDX_PKEY_PARAM_RSA_TEST_P1 249 +#define PIDX_PKEY_PARAM_RSA_TEST_P2 250 +#define PIDX_PKEY_PARAM_RSA_TEST_Q1 251 +#define PIDX_PKEY_PARAM_RSA_TEST_Q2 252 +#define PIDX_PKEY_PARAM_RSA_TEST_XP 253 +#define PIDX_PKEY_PARAM_RSA_TEST_XP1 254 +#define PIDX_PKEY_PARAM_RSA_TEST_XP2 255 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ 256 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 257 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 258 +#define PIDX_PKEY_PARAM_SECURITY_BITS 259 +#define PIDX_PKEY_PARAM_USE_COFACTOR_ECDH PIDX_PKEY_PARAM_USE_COFACTOR_FLAG +#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 260 +#define PIDX_PROV_PARAM_BUILDINFO 261 +#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 262 +#define PIDX_PROV_PARAM_CORE_PROV_NAME 263 +#define PIDX_PROV_PARAM_CORE_VERSION 264 +#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 265 +#define PIDX_PROV_PARAM_NAME 266 +#define PIDX_PROV_PARAM_SECURITY_CHECKS 267 +#define PIDX_PROV_PARAM_SELF_TEST_DESC 268 +#define PIDX_PROV_PARAM_SELF_TEST_PHASE 269 +#define PIDX_PROV_PARAM_SELF_TEST_TYPE 270 +#define PIDX_PROV_PARAM_STATUS 271 +#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 272 +#define PIDX_PROV_PARAM_VERSION 108 +#define PIDX_RAND_PARAM_GENERATE 273 +#define PIDX_RAND_PARAM_MAX_REQUEST 274 +#define PIDX_RAND_PARAM_STATE 275 +#define PIDX_RAND_PARAM_STRENGTH 276 +#define PIDX_RAND_PARAM_TEST_ENTROPY 277 +#define PIDX_RAND_PARAM_TEST_NONCE 278 +#define PIDX_SIGNATURE_PARAM_ALGORITHM_ID 279 +#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 280 +#define PIDX_SIGNATURE_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_SIGNATURE_PARAM_DIGEST_SIZE PIDX_PKEY_PARAM_DIGEST_SIZE +#define PIDX_SIGNATURE_PARAM_INSTANCE 281 +#define PIDX_SIGNATURE_PARAM_KAT 282 +#define PIDX_SIGNATURE_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_SIGNATURE_PARAM_MGF1_PROPERTIES PIDX_PKEY_PARAM_MGF1_PROPERTIES +#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 283 +#define PIDX_SIGNATURE_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE +#define PIDX_SIGNATURE_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 248 +#define PIDX_STORE_PARAM_ALIAS 284 +#define PIDX_STORE_PARAM_DIGEST 1 +#define PIDX_STORE_PARAM_EXPECT 285 +#define PIDX_STORE_PARAM_FINGERPRINT 286 +#define PIDX_STORE_PARAM_INPUT_TYPE 287 +#define PIDX_STORE_PARAM_ISSUER 266 +#define PIDX_STORE_PARAM_PROPERTIES 4 +#define PIDX_STORE_PARAM_SERIAL 288 +#define PIDX_STORE_PARAM_SUBJECT 289 diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1.h new file mode 100644 index 00000000000..09712345941 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1.h @@ -0,0 +1,1133 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/asn1.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ASN1_H +# define OPENSSL_ASN1_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ASN1_H +# endif + +# ifndef OPENSSL_NO_STDIO +# include +# endif +# include +# include +# include +# include +# include +# include +# include + +# include +# include + +# ifdef OPENSSL_BUILD_SHLIBCRYPTO +# undef OPENSSL_EXTERN +# define OPENSSL_EXTERN OPENSSL_EXPORT +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define V_ASN1_UNIVERSAL 0x00 +# define V_ASN1_APPLICATION 0x40 +# define V_ASN1_CONTEXT_SPECIFIC 0x80 +# define V_ASN1_PRIVATE 0xc0 + +# define V_ASN1_CONSTRUCTED 0x20 +# define V_ASN1_PRIMITIVE_TAG 0x1f +# define V_ASN1_PRIMATIVE_TAG /*compat*/ V_ASN1_PRIMITIVE_TAG + +# define V_ASN1_APP_CHOOSE -2/* let the recipient choose */ +# define V_ASN1_OTHER -3/* used in ASN1_TYPE */ +# define V_ASN1_ANY -4/* used in ASN1 template code */ + +# define V_ASN1_UNDEF -1 +/* ASN.1 tag values */ +# define V_ASN1_EOC 0 +# define V_ASN1_BOOLEAN 1 /**/ +# define V_ASN1_INTEGER 2 +# define V_ASN1_BIT_STRING 3 +# define V_ASN1_OCTET_STRING 4 +# define V_ASN1_NULL 5 +# define V_ASN1_OBJECT 6 +# define V_ASN1_OBJECT_DESCRIPTOR 7 +# define V_ASN1_EXTERNAL 8 +# define V_ASN1_REAL 9 +# define V_ASN1_ENUMERATED 10 +# define V_ASN1_UTF8STRING 12 +# define V_ASN1_SEQUENCE 16 +# define V_ASN1_SET 17 +# define V_ASN1_NUMERICSTRING 18 /**/ +# define V_ASN1_PRINTABLESTRING 19 +# define V_ASN1_T61STRING 20 +# define V_ASN1_TELETEXSTRING 20/* alias */ +# define V_ASN1_VIDEOTEXSTRING 21 /**/ +# define V_ASN1_IA5STRING 22 +# define V_ASN1_UTCTIME 23 +# define V_ASN1_GENERALIZEDTIME 24 /**/ +# define V_ASN1_GRAPHICSTRING 25 /**/ +# define V_ASN1_ISO64STRING 26 /**/ +# define V_ASN1_VISIBLESTRING 26/* alias */ +# define V_ASN1_GENERALSTRING 27 /**/ +# define V_ASN1_UNIVERSALSTRING 28 /**/ +# define V_ASN1_BMPSTRING 30 + +/* + * NB the constants below are used internally by ASN1_INTEGER + * and ASN1_ENUMERATED to indicate the sign. They are *not* on + * the wire tag values. + */ + +# define V_ASN1_NEG 0x100 +# define V_ASN1_NEG_INTEGER (2 | V_ASN1_NEG) +# define V_ASN1_NEG_ENUMERATED (10 | V_ASN1_NEG) + +/* For use with d2i_ASN1_type_bytes() */ +# define B_ASN1_NUMERICSTRING 0x0001 +# define B_ASN1_PRINTABLESTRING 0x0002 +# define B_ASN1_T61STRING 0x0004 +# define B_ASN1_TELETEXSTRING 0x0004 +# define B_ASN1_VIDEOTEXSTRING 0x0008 +# define B_ASN1_IA5STRING 0x0010 +# define B_ASN1_GRAPHICSTRING 0x0020 +# define B_ASN1_ISO64STRING 0x0040 +# define B_ASN1_VISIBLESTRING 0x0040 +# define B_ASN1_GENERALSTRING 0x0080 +# define B_ASN1_UNIVERSALSTRING 0x0100 +# define B_ASN1_OCTET_STRING 0x0200 +# define B_ASN1_BIT_STRING 0x0400 +# define B_ASN1_BMPSTRING 0x0800 +# define B_ASN1_UNKNOWN 0x1000 +# define B_ASN1_UTF8STRING 0x2000 +# define B_ASN1_UTCTIME 0x4000 +# define B_ASN1_GENERALIZEDTIME 0x8000 +# define B_ASN1_SEQUENCE 0x10000 +/* For use with ASN1_mbstring_copy() */ +# define MBSTRING_FLAG 0x1000 +# define MBSTRING_UTF8 (MBSTRING_FLAG) +# define MBSTRING_ASC (MBSTRING_FLAG|1) +# define MBSTRING_BMP (MBSTRING_FLAG|2) +# define MBSTRING_UNIV (MBSTRING_FLAG|4) +# define SMIME_OLDMIME 0x400 +# define SMIME_CRLFEOL 0x800 +# define SMIME_STREAM 0x1000 + +/* Stacks for types not otherwise defined in this header */ +SKM_DEFINE_STACK_OF_INTERNAL(X509_ALGOR, X509_ALGOR, X509_ALGOR) +#define sk_X509_ALGOR_num(sk) OPENSSL_sk_num(ossl_check_const_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_value(sk, idx) ((X509_ALGOR *)OPENSSL_sk_value(ossl_check_const_X509_ALGOR_sk_type(sk), (idx))) +#define sk_X509_ALGOR_new(cmp) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new(ossl_check_X509_ALGOR_compfunc_type(cmp))) +#define sk_X509_ALGOR_new_null() ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new_null()) +#define sk_X509_ALGOR_new_reserve(cmp, n) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new_reserve(ossl_check_X509_ALGOR_compfunc_type(cmp), (n))) +#define sk_X509_ALGOR_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_ALGOR_sk_type(sk), (n)) +#define sk_X509_ALGOR_free(sk) OPENSSL_sk_free(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_zero(sk) OPENSSL_sk_zero(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_delete(sk, i) ((X509_ALGOR *)OPENSSL_sk_delete(ossl_check_X509_ALGOR_sk_type(sk), (i))) +#define sk_X509_ALGOR_delete_ptr(sk, ptr) ((X509_ALGOR *)OPENSSL_sk_delete_ptr(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr))) +#define sk_X509_ALGOR_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_pop(sk) ((X509_ALGOR *)OPENSSL_sk_pop(ossl_check_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_shift(sk) ((X509_ALGOR *)OPENSSL_sk_shift(ossl_check_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_ALGOR_sk_type(sk),ossl_check_X509_ALGOR_freefunc_type(freefunc)) +#define sk_X509_ALGOR_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr), (idx)) +#define sk_X509_ALGOR_set(sk, idx, ptr) ((X509_ALGOR *)OPENSSL_sk_set(ossl_check_X509_ALGOR_sk_type(sk), (idx), ossl_check_X509_ALGOR_type(ptr))) +#define sk_X509_ALGOR_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr), pnum) +#define sk_X509_ALGOR_sort(sk) OPENSSL_sk_sort(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_dup(sk) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_dup(ossl_check_const_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_copyfunc_type(copyfunc), ossl_check_X509_ALGOR_freefunc_type(freefunc))) +#define sk_X509_ALGOR_set_cmp_func(sk, cmp) ((sk_X509_ALGOR_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_compfunc_type(cmp))) + + + +# define ASN1_STRING_FLAG_BITS_LEFT 0x08 /* Set if 0x07 has bits left value */ +/* + * This indicates that the ASN1_STRING is not a real value but just a place + * holder for the location where indefinite length constructed data should be + * inserted in the memory buffer + */ +# define ASN1_STRING_FLAG_NDEF 0x010 + +/* + * This flag is used by the CMS code to indicate that a string is not + * complete and is a place holder for content when it had all been accessed. + * The flag will be reset when content has been written to it. + */ + +# define ASN1_STRING_FLAG_CONT 0x020 +/* + * This flag is used by ASN1 code to indicate an ASN1_STRING is an MSTRING + * type. + */ +# define ASN1_STRING_FLAG_MSTRING 0x040 +/* String is embedded and only content should be freed */ +# define ASN1_STRING_FLAG_EMBED 0x080 +/* String should be parsed in RFC 5280's time format */ +# define ASN1_STRING_FLAG_X509_TIME 0x100 +/* This is the base type that holds just about everything :-) */ +struct asn1_string_st { + int length; + int type; + unsigned char *data; + /* + * The value of the following field depends on the type being held. It + * is mostly being used for BIT_STRING so if the input data has a + * non-zero 'unused bits' value, it will be handled correctly + */ + long flags; +}; + +/* + * ASN1_ENCODING structure: this is used to save the received encoding of an + * ASN1 type. This is useful to get round problems with invalid encodings + * which can break signatures. + */ + +typedef struct ASN1_ENCODING_st { + unsigned char *enc; /* DER encoding */ + long len; /* Length of encoding */ + int modified; /* set to 1 if 'enc' is invalid */ +} ASN1_ENCODING; + +/* Used with ASN1 LONG type: if a long is set to this it is omitted */ +# define ASN1_LONG_UNDEF 0x7fffffffL + +# define STABLE_FLAGS_MALLOC 0x01 +/* + * A zero passed to ASN1_STRING_TABLE_new_add for the flags is interpreted + * as "don't change" and STABLE_FLAGS_MALLOC is always set. By setting + * STABLE_FLAGS_MALLOC only we can clear the existing value. Use the alias + * STABLE_FLAGS_CLEAR to reflect this. + */ +# define STABLE_FLAGS_CLEAR STABLE_FLAGS_MALLOC +# define STABLE_NO_MASK 0x02 +# define DIRSTRING_TYPE \ + (B_ASN1_PRINTABLESTRING|B_ASN1_T61STRING|B_ASN1_BMPSTRING|B_ASN1_UTF8STRING) +# define PKCS9STRING_TYPE (DIRSTRING_TYPE|B_ASN1_IA5STRING) + +struct asn1_string_table_st { + int nid; + long minsize; + long maxsize; + unsigned long mask; + unsigned long flags; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_STRING_TABLE, ASN1_STRING_TABLE, ASN1_STRING_TABLE) +#define sk_ASN1_STRING_TABLE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_value(sk, idx) ((ASN1_STRING_TABLE *)OPENSSL_sk_value(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk), (idx))) +#define sk_ASN1_STRING_TABLE_new(cmp) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new(ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp))) +#define sk_ASN1_STRING_TABLE_new_null() ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_STRING_TABLE_new_reserve(cmp, n) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp), (n))) +#define sk_ASN1_STRING_TABLE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (n)) +#define sk_ASN1_STRING_TABLE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_delete(sk, i) ((ASN1_STRING_TABLE *)OPENSSL_sk_delete(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (i))) +#define sk_ASN1_STRING_TABLE_delete_ptr(sk, ptr) ((ASN1_STRING_TABLE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr))) +#define sk_ASN1_STRING_TABLE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_pop(sk) ((ASN1_STRING_TABLE *)OPENSSL_sk_pop(ossl_check_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_shift(sk) ((ASN1_STRING_TABLE *)OPENSSL_sk_shift(ossl_check_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_STRING_TABLE_sk_type(sk),ossl_check_ASN1_STRING_TABLE_freefunc_type(freefunc)) +#define sk_ASN1_STRING_TABLE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr), (idx)) +#define sk_ASN1_STRING_TABLE_set(sk, idx, ptr) ((ASN1_STRING_TABLE *)OPENSSL_sk_set(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (idx), ossl_check_ASN1_STRING_TABLE_type(ptr))) +#define sk_ASN1_STRING_TABLE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr), pnum) +#define sk_ASN1_STRING_TABLE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_dup(sk) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_copyfunc_type(copyfunc), ossl_check_ASN1_STRING_TABLE_freefunc_type(freefunc))) +#define sk_ASN1_STRING_TABLE_set_cmp_func(sk, cmp) ((sk_ASN1_STRING_TABLE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp))) + + +/* size limits: this stuff is taken straight from RFC2459 */ + +# define ub_name 32768 +# define ub_common_name 64 +# define ub_locality_name 128 +# define ub_state_name 128 +# define ub_organization_name 64 +# define ub_organization_unit_name 64 +# define ub_title 64 +# define ub_email_address 128 + +/* + * Declarations for template structures: for full definitions see asn1t.h + */ +typedef struct ASN1_TEMPLATE_st ASN1_TEMPLATE; +typedef struct ASN1_TLC_st ASN1_TLC; +/* This is just an opaque pointer */ +typedef struct ASN1_VALUE_st ASN1_VALUE; + +/* Declare ASN1 functions: the implement macro in in asn1t.h */ + +/* + * The mysterious 'extern' that's passed to some macros is innocuous, + * and is there to quiet pre-C99 compilers that may complain about empty + * arguments in macro calls. + */ + +# define DECLARE_ASN1_FUNCTIONS_attr(attr, type) \ + DECLARE_ASN1_FUNCTIONS_name_attr(attr, type, type) +# define DECLARE_ASN1_FUNCTIONS(type) \ + DECLARE_ASN1_FUNCTIONS_attr(extern, type) + +# define DECLARE_ASN1_ALLOC_FUNCTIONS_attr(attr, type) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, type) +# define DECLARE_ASN1_ALLOC_FUNCTIONS(type) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_attr(extern, type) + +# define DECLARE_ASN1_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(attr, type, name) +# define DECLARE_ASN1_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_attr(attr, type, itname, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(attr, type, name) \ + DECLARE_ASN1_ITEM_attr(attr, itname) +# define DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_attr(extern, type, itname, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_attr(attr, type, name, name) +# define DECLARE_ASN1_ENCODE_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(attr, type, name) \ + attr type *d2i_##name(type **a, const unsigned char **in, long len); \ + attr int i2d_##name(const type *a, unsigned char **out); +# define DECLARE_ASN1_ENCODE_FUNCTIONS_only(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(extern, type, name) + +# define DECLARE_ASN1_NDEF_FUNCTION_attr(attr, name) \ + attr int i2d_##name##_NDEF(const name *a, unsigned char **out); +# define DECLARE_ASN1_NDEF_FUNCTION(name) \ + DECLARE_ASN1_NDEF_FUNCTION_attr(extern, name) + +# define DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, name) \ + attr type *name##_new(void); \ + attr void name##_free(type *a); +# define DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_DUP_FUNCTION_attr(attr, type) \ + DECLARE_ASN1_DUP_FUNCTION_name_attr(attr, type, type) +# define DECLARE_ASN1_DUP_FUNCTION(type) \ + DECLARE_ASN1_DUP_FUNCTION_attr(extern, type) + +# define DECLARE_ASN1_DUP_FUNCTION_name_attr(attr, type, name) \ + attr type *name##_dup(const type *a); +# define DECLARE_ASN1_DUP_FUNCTION_name(type, name) \ + DECLARE_ASN1_DUP_FUNCTION_name_attr(extern, type, name) + +# define DECLARE_ASN1_PRINT_FUNCTION_attr(attr, stname) \ + DECLARE_ASN1_PRINT_FUNCTION_fname_attr(attr, stname, stname) +# define DECLARE_ASN1_PRINT_FUNCTION(stname) \ + DECLARE_ASN1_PRINT_FUNCTION_attr(extern, stname) + +# define DECLARE_ASN1_PRINT_FUNCTION_fname_attr(attr, stname, fname) \ + attr int fname##_print_ctx(BIO *out, const stname *x, int indent, \ + const ASN1_PCTX *pctx); +# define DECLARE_ASN1_PRINT_FUNCTION_fname(stname, fname) \ + DECLARE_ASN1_PRINT_FUNCTION_fname_attr(extern, stname, fname) + +# define D2I_OF(type) type *(*)(type **,const unsigned char **,long) +# define I2D_OF(type) int (*)(const type *,unsigned char **) + +# define CHECKED_D2I_OF(type, d2i) \ + ((d2i_of_void*) (1 ? d2i : ((D2I_OF(type))0))) +# define CHECKED_I2D_OF(type, i2d) \ + ((i2d_of_void*) (1 ? i2d : ((I2D_OF(type))0))) +# define CHECKED_NEW_OF(type, xnew) \ + ((void *(*)(void)) (1 ? xnew : ((type *(*)(void))0))) +# define CHECKED_PTR_OF(type, p) \ + ((void*) (1 ? p : (type*)0)) +# define CHECKED_PPTR_OF(type, p) \ + ((void**) (1 ? p : (type**)0)) + +# define TYPEDEF_D2I_OF(type) typedef type *d2i_of_##type(type **,const unsigned char **,long) +# define TYPEDEF_I2D_OF(type) typedef int i2d_of_##type(const type *,unsigned char **) +# define TYPEDEF_D2I2D_OF(type) TYPEDEF_D2I_OF(type); TYPEDEF_I2D_OF(type) + +typedef void *d2i_of_void(void **, const unsigned char **, long); +typedef int i2d_of_void(const void *, unsigned char **); + +/*- + * The following macros and typedefs allow an ASN1_ITEM + * to be embedded in a structure and referenced. Since + * the ASN1_ITEM pointers need to be globally accessible + * (possibly from shared libraries) they may exist in + * different forms. On platforms that support it the + * ASN1_ITEM structure itself will be globally exported. + * Other platforms will export a function that returns + * an ASN1_ITEM pointer. + * + * To handle both cases transparently the macros below + * should be used instead of hard coding an ASN1_ITEM + * pointer in a structure. + * + * The structure will look like this: + * + * typedef struct SOMETHING_st { + * ... + * ASN1_ITEM_EXP *iptr; + * ... + * } SOMETHING; + * + * It would be initialised as e.g.: + * + * SOMETHING somevar = {...,ASN1_ITEM_ref(X509),...}; + * + * and the actual pointer extracted with: + * + * const ASN1_ITEM *it = ASN1_ITEM_ptr(somevar.iptr); + * + * Finally an ASN1_ITEM pointer can be extracted from an + * appropriate reference with: ASN1_ITEM_rptr(X509). This + * would be used when a function takes an ASN1_ITEM * argument. + * + */ + + +/* + * Platforms that can't easily handle shared global variables are declared as + * functions returning ASN1_ITEM pointers. + */ + +/* ASN1_ITEM pointer exported type */ +typedef const ASN1_ITEM *ASN1_ITEM_EXP (void); + +/* Macro to obtain ASN1_ITEM pointer from exported type */ +# define ASN1_ITEM_ptr(iptr) (iptr()) + +/* Macro to include ASN1_ITEM pointer from base type */ +# define ASN1_ITEM_ref(iptr) (iptr##_it) + +# define ASN1_ITEM_rptr(ref) (ref##_it()) + +# define DECLARE_ASN1_ITEM_attr(attr, name) \ + attr const ASN1_ITEM * name##_it(void); +# define DECLARE_ASN1_ITEM(name) \ + DECLARE_ASN1_ITEM_attr(extern, name) + +/* Parameters used by ASN1_STRING_print_ex() */ + +/* + * These determine which characters to escape: RFC2253 special characters, + * control characters and MSB set characters + */ + +# define ASN1_STRFLGS_ESC_2253 1 +# define ASN1_STRFLGS_ESC_CTRL 2 +# define ASN1_STRFLGS_ESC_MSB 4 + +/* Lower 8 bits are reserved as an output type specifier */ +# define ASN1_DTFLGS_TYPE_MASK 0x0FUL +# define ASN1_DTFLGS_RFC822 0x00UL +# define ASN1_DTFLGS_ISO8601 0x01UL + +/* + * This flag determines how we do escaping: normally RC2253 backslash only, + * set this to use backslash and quote. + */ + +# define ASN1_STRFLGS_ESC_QUOTE 8 + +/* These three flags are internal use only. */ + +/* Character is a valid PrintableString character */ +# define CHARTYPE_PRINTABLESTRING 0x10 +/* Character needs escaping if it is the first character */ +# define CHARTYPE_FIRST_ESC_2253 0x20 +/* Character needs escaping if it is the last character */ +# define CHARTYPE_LAST_ESC_2253 0x40 + +/* + * NB the internal flags are safely reused below by flags handled at the top + * level. + */ + +/* + * If this is set we convert all character strings to UTF8 first + */ + +# define ASN1_STRFLGS_UTF8_CONVERT 0x10 + +/* + * If this is set we don't attempt to interpret content: just assume all + * strings are 1 byte per character. This will produce some pretty odd + * looking output! + */ + +# define ASN1_STRFLGS_IGNORE_TYPE 0x20 + +/* If this is set we include the string type in the output */ +# define ASN1_STRFLGS_SHOW_TYPE 0x40 + +/* + * This determines which strings to display and which to 'dump' (hex dump of + * content octets or DER encoding). We can only dump non character strings or + * everything. If we don't dump 'unknown' they are interpreted as character + * strings with 1 octet per character and are subject to the usual escaping + * options. + */ + +# define ASN1_STRFLGS_DUMP_ALL 0x80 +# define ASN1_STRFLGS_DUMP_UNKNOWN 0x100 + +/* + * These determine what 'dumping' does, we can dump the content octets or the + * DER encoding: both use the RFC2253 #XXXXX notation. + */ + +# define ASN1_STRFLGS_DUMP_DER 0x200 + +/* + * This flag specifies that RC2254 escaping shall be performed. + */ +#define ASN1_STRFLGS_ESC_2254 0x400 + +/* + * All the string flags consistent with RFC2253, escaping control characters + * isn't essential in RFC2253 but it is advisable anyway. + */ + +# define ASN1_STRFLGS_RFC2253 (ASN1_STRFLGS_ESC_2253 | \ + ASN1_STRFLGS_ESC_CTRL | \ + ASN1_STRFLGS_ESC_MSB | \ + ASN1_STRFLGS_UTF8_CONVERT | \ + ASN1_STRFLGS_DUMP_UNKNOWN | \ + ASN1_STRFLGS_DUMP_DER) + + +struct asn1_type_st { + int type; + union { + char *ptr; + ASN1_BOOLEAN boolean; + ASN1_STRING *asn1_string; + ASN1_OBJECT *object; + ASN1_INTEGER *integer; + ASN1_ENUMERATED *enumerated; + ASN1_BIT_STRING *bit_string; + ASN1_OCTET_STRING *octet_string; + ASN1_PRINTABLESTRING *printablestring; + ASN1_T61STRING *t61string; + ASN1_IA5STRING *ia5string; + ASN1_GENERALSTRING *generalstring; + ASN1_BMPSTRING *bmpstring; + ASN1_UNIVERSALSTRING *universalstring; + ASN1_UTCTIME *utctime; + ASN1_GENERALIZEDTIME *generalizedtime; + ASN1_VISIBLESTRING *visiblestring; + ASN1_UTF8STRING *utf8string; + /* + * set and sequence are left complete and still contain the set or + * sequence bytes + */ + ASN1_STRING *set; + ASN1_STRING *sequence; + ASN1_VALUE *asn1_value; + } value; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_TYPE, ASN1_TYPE, ASN1_TYPE) +#define sk_ASN1_TYPE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_value(sk, idx) ((ASN1_TYPE *)OPENSSL_sk_value(ossl_check_const_ASN1_TYPE_sk_type(sk), (idx))) +#define sk_ASN1_TYPE_new(cmp) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new(ossl_check_ASN1_TYPE_compfunc_type(cmp))) +#define sk_ASN1_TYPE_new_null() ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_TYPE_new_reserve(cmp, n) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_TYPE_compfunc_type(cmp), (n))) +#define sk_ASN1_TYPE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_TYPE_sk_type(sk), (n)) +#define sk_ASN1_TYPE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_delete(sk, i) ((ASN1_TYPE *)OPENSSL_sk_delete(ossl_check_ASN1_TYPE_sk_type(sk), (i))) +#define sk_ASN1_TYPE_delete_ptr(sk, ptr) ((ASN1_TYPE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr))) +#define sk_ASN1_TYPE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_pop(sk) ((ASN1_TYPE *)OPENSSL_sk_pop(ossl_check_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_shift(sk) ((ASN1_TYPE *)OPENSSL_sk_shift(ossl_check_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_TYPE_sk_type(sk),ossl_check_ASN1_TYPE_freefunc_type(freefunc)) +#define sk_ASN1_TYPE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr), (idx)) +#define sk_ASN1_TYPE_set(sk, idx, ptr) ((ASN1_TYPE *)OPENSSL_sk_set(ossl_check_ASN1_TYPE_sk_type(sk), (idx), ossl_check_ASN1_TYPE_type(ptr))) +#define sk_ASN1_TYPE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr), pnum) +#define sk_ASN1_TYPE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_dup(sk) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_copyfunc_type(copyfunc), ossl_check_ASN1_TYPE_freefunc_type(freefunc))) +#define sk_ASN1_TYPE_set_cmp_func(sk, cmp) ((sk_ASN1_TYPE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_compfunc_type(cmp))) + + +typedef STACK_OF(ASN1_TYPE) ASN1_SEQUENCE_ANY; + +DECLARE_ASN1_ENCODE_FUNCTIONS_name(ASN1_SEQUENCE_ANY, ASN1_SEQUENCE_ANY) +DECLARE_ASN1_ENCODE_FUNCTIONS_name(ASN1_SEQUENCE_ANY, ASN1_SET_ANY) + +/* This is used to contain a list of bit names */ +typedef struct BIT_STRING_BITNAME_st { + int bitnum; + const char *lname; + const char *sname; +} BIT_STRING_BITNAME; + +# define B_ASN1_TIME \ + B_ASN1_UTCTIME | \ + B_ASN1_GENERALIZEDTIME + +# define B_ASN1_PRINTABLE \ + B_ASN1_NUMERICSTRING| \ + B_ASN1_PRINTABLESTRING| \ + B_ASN1_T61STRING| \ + B_ASN1_IA5STRING| \ + B_ASN1_BIT_STRING| \ + B_ASN1_UNIVERSALSTRING|\ + B_ASN1_BMPSTRING|\ + B_ASN1_UTF8STRING|\ + B_ASN1_SEQUENCE|\ + B_ASN1_UNKNOWN + +# define B_ASN1_DIRECTORYSTRING \ + B_ASN1_PRINTABLESTRING| \ + B_ASN1_TELETEXSTRING|\ + B_ASN1_BMPSTRING|\ + B_ASN1_UNIVERSALSTRING|\ + B_ASN1_UTF8STRING + +# define B_ASN1_DISPLAYTEXT \ + B_ASN1_IA5STRING| \ + B_ASN1_VISIBLESTRING| \ + B_ASN1_BMPSTRING|\ + B_ASN1_UTF8STRING + +DECLARE_ASN1_ALLOC_FUNCTIONS_name(ASN1_TYPE, ASN1_TYPE) +DECLARE_ASN1_ENCODE_FUNCTIONS(ASN1_TYPE, ASN1_ANY, ASN1_TYPE) + +int ASN1_TYPE_get(const ASN1_TYPE *a); +void ASN1_TYPE_set(ASN1_TYPE *a, int type, void *value); +int ASN1_TYPE_set1(ASN1_TYPE *a, int type, const void *value); +int ASN1_TYPE_cmp(const ASN1_TYPE *a, const ASN1_TYPE *b); + +ASN1_TYPE *ASN1_TYPE_pack_sequence(const ASN1_ITEM *it, void *s, ASN1_TYPE **t); +void *ASN1_TYPE_unpack_sequence(const ASN1_ITEM *it, const ASN1_TYPE *t); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_OBJECT, ASN1_OBJECT, ASN1_OBJECT) +#define sk_ASN1_OBJECT_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_value(sk, idx) ((ASN1_OBJECT *)OPENSSL_sk_value(ossl_check_const_ASN1_OBJECT_sk_type(sk), (idx))) +#define sk_ASN1_OBJECT_new(cmp) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new(ossl_check_ASN1_OBJECT_compfunc_type(cmp))) +#define sk_ASN1_OBJECT_new_null() ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new_null()) +#define sk_ASN1_OBJECT_new_reserve(cmp, n) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_OBJECT_compfunc_type(cmp), (n))) +#define sk_ASN1_OBJECT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_OBJECT_sk_type(sk), (n)) +#define sk_ASN1_OBJECT_free(sk) OPENSSL_sk_free(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_delete(sk, i) ((ASN1_OBJECT *)OPENSSL_sk_delete(ossl_check_ASN1_OBJECT_sk_type(sk), (i))) +#define sk_ASN1_OBJECT_delete_ptr(sk, ptr) ((ASN1_OBJECT *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr))) +#define sk_ASN1_OBJECT_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_pop(sk) ((ASN1_OBJECT *)OPENSSL_sk_pop(ossl_check_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_shift(sk) ((ASN1_OBJECT *)OPENSSL_sk_shift(ossl_check_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_OBJECT_sk_type(sk),ossl_check_ASN1_OBJECT_freefunc_type(freefunc)) +#define sk_ASN1_OBJECT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr), (idx)) +#define sk_ASN1_OBJECT_set(sk, idx, ptr) ((ASN1_OBJECT *)OPENSSL_sk_set(ossl_check_ASN1_OBJECT_sk_type(sk), (idx), ossl_check_ASN1_OBJECT_type(ptr))) +#define sk_ASN1_OBJECT_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr), pnum) +#define sk_ASN1_OBJECT_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_dup(sk) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_dup(ossl_check_const_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_copyfunc_type(copyfunc), ossl_check_ASN1_OBJECT_freefunc_type(freefunc))) +#define sk_ASN1_OBJECT_set_cmp_func(sk, cmp) ((sk_ASN1_OBJECT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS(ASN1_OBJECT) + +ASN1_STRING *ASN1_STRING_new(void); +void ASN1_STRING_free(ASN1_STRING *a); +void ASN1_STRING_clear_free(ASN1_STRING *a); +int ASN1_STRING_copy(ASN1_STRING *dst, const ASN1_STRING *str); +DECLARE_ASN1_DUP_FUNCTION(ASN1_STRING) +ASN1_STRING *ASN1_STRING_type_new(int type); +int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b); + /* + * Since this is used to store all sorts of things, via macros, for now, + * make its data void * + */ +int ASN1_STRING_set(ASN1_STRING *str, const void *data, int len); +void ASN1_STRING_set0(ASN1_STRING *str, void *data, int len); +int ASN1_STRING_length(const ASN1_STRING *x); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 void ASN1_STRING_length_set(ASN1_STRING *x, int n); +# endif +int ASN1_STRING_type(const ASN1_STRING *x); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 unsigned char *ASN1_STRING_data(ASN1_STRING *x); +# endif +const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *x); + +DECLARE_ASN1_FUNCTIONS(ASN1_BIT_STRING) +int ASN1_BIT_STRING_set(ASN1_BIT_STRING *a, unsigned char *d, int length); +int ASN1_BIT_STRING_set_bit(ASN1_BIT_STRING *a, int n, int value); +int ASN1_BIT_STRING_get_bit(const ASN1_BIT_STRING *a, int n); +int ASN1_BIT_STRING_check(const ASN1_BIT_STRING *a, + const unsigned char *flags, int flags_len); + +int ASN1_BIT_STRING_name_print(BIO *out, ASN1_BIT_STRING *bs, + BIT_STRING_BITNAME *tbl, int indent); +int ASN1_BIT_STRING_num_asc(const char *name, BIT_STRING_BITNAME *tbl); +int ASN1_BIT_STRING_set_asc(ASN1_BIT_STRING *bs, const char *name, int value, + BIT_STRING_BITNAME *tbl); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_INTEGER, ASN1_INTEGER, ASN1_INTEGER) +#define sk_ASN1_INTEGER_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_value(sk, idx) ((ASN1_INTEGER *)OPENSSL_sk_value(ossl_check_const_ASN1_INTEGER_sk_type(sk), (idx))) +#define sk_ASN1_INTEGER_new(cmp) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new(ossl_check_ASN1_INTEGER_compfunc_type(cmp))) +#define sk_ASN1_INTEGER_new_null() ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new_null()) +#define sk_ASN1_INTEGER_new_reserve(cmp, n) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_INTEGER_compfunc_type(cmp), (n))) +#define sk_ASN1_INTEGER_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_INTEGER_sk_type(sk), (n)) +#define sk_ASN1_INTEGER_free(sk) OPENSSL_sk_free(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_delete(sk, i) ((ASN1_INTEGER *)OPENSSL_sk_delete(ossl_check_ASN1_INTEGER_sk_type(sk), (i))) +#define sk_ASN1_INTEGER_delete_ptr(sk, ptr) ((ASN1_INTEGER *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr))) +#define sk_ASN1_INTEGER_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_pop(sk) ((ASN1_INTEGER *)OPENSSL_sk_pop(ossl_check_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_shift(sk) ((ASN1_INTEGER *)OPENSSL_sk_shift(ossl_check_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_INTEGER_sk_type(sk),ossl_check_ASN1_INTEGER_freefunc_type(freefunc)) +#define sk_ASN1_INTEGER_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr), (idx)) +#define sk_ASN1_INTEGER_set(sk, idx, ptr) ((ASN1_INTEGER *)OPENSSL_sk_set(ossl_check_ASN1_INTEGER_sk_type(sk), (idx), ossl_check_ASN1_INTEGER_type(ptr))) +#define sk_ASN1_INTEGER_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr), pnum) +#define sk_ASN1_INTEGER_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_dup(sk) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_dup(ossl_check_const_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_copyfunc_type(copyfunc), ossl_check_ASN1_INTEGER_freefunc_type(freefunc))) +#define sk_ASN1_INTEGER_set_cmp_func(sk, cmp) ((sk_ASN1_INTEGER_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_compfunc_type(cmp))) + + + +DECLARE_ASN1_FUNCTIONS(ASN1_INTEGER) +ASN1_INTEGER *d2i_ASN1_UINTEGER(ASN1_INTEGER **a, const unsigned char **pp, + long length); +DECLARE_ASN1_DUP_FUNCTION(ASN1_INTEGER) +int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y); + +DECLARE_ASN1_FUNCTIONS(ASN1_ENUMERATED) + +int ASN1_UTCTIME_check(const ASN1_UTCTIME *a); +ASN1_UTCTIME *ASN1_UTCTIME_set(ASN1_UTCTIME *s, time_t t); +ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t, + int offset_day, long offset_sec); +int ASN1_UTCTIME_set_string(ASN1_UTCTIME *s, const char *str); +int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t); + +int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *a); +ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_set(ASN1_GENERALIZEDTIME *s, + time_t t); +ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_adj(ASN1_GENERALIZEDTIME *s, + time_t t, int offset_day, + long offset_sec); +int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str); + +int ASN1_TIME_diff(int *pday, int *psec, + const ASN1_TIME *from, const ASN1_TIME *to); + +DECLARE_ASN1_FUNCTIONS(ASN1_OCTET_STRING) +DECLARE_ASN1_DUP_FUNCTION(ASN1_OCTET_STRING) +int ASN1_OCTET_STRING_cmp(const ASN1_OCTET_STRING *a, + const ASN1_OCTET_STRING *b); +int ASN1_OCTET_STRING_set(ASN1_OCTET_STRING *str, const unsigned char *data, + int len); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_UTF8STRING, ASN1_UTF8STRING, ASN1_UTF8STRING) +#define sk_ASN1_UTF8STRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_value(sk, idx) ((ASN1_UTF8STRING *)OPENSSL_sk_value(ossl_check_const_ASN1_UTF8STRING_sk_type(sk), (idx))) +#define sk_ASN1_UTF8STRING_new(cmp) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new(ossl_check_ASN1_UTF8STRING_compfunc_type(cmp))) +#define sk_ASN1_UTF8STRING_new_null() ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_UTF8STRING_new_reserve(cmp, n) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_UTF8STRING_compfunc_type(cmp), (n))) +#define sk_ASN1_UTF8STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_UTF8STRING_sk_type(sk), (n)) +#define sk_ASN1_UTF8STRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_delete(sk, i) ((ASN1_UTF8STRING *)OPENSSL_sk_delete(ossl_check_ASN1_UTF8STRING_sk_type(sk), (i))) +#define sk_ASN1_UTF8STRING_delete_ptr(sk, ptr) ((ASN1_UTF8STRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr))) +#define sk_ASN1_UTF8STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_pop(sk) ((ASN1_UTF8STRING *)OPENSSL_sk_pop(ossl_check_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_shift(sk) ((ASN1_UTF8STRING *)OPENSSL_sk_shift(ossl_check_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_UTF8STRING_sk_type(sk),ossl_check_ASN1_UTF8STRING_freefunc_type(freefunc)) +#define sk_ASN1_UTF8STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr), (idx)) +#define sk_ASN1_UTF8STRING_set(sk, idx, ptr) ((ASN1_UTF8STRING *)OPENSSL_sk_set(ossl_check_ASN1_UTF8STRING_sk_type(sk), (idx), ossl_check_ASN1_UTF8STRING_type(ptr))) +#define sk_ASN1_UTF8STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr), pnum) +#define sk_ASN1_UTF8STRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_dup(sk) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_copyfunc_type(copyfunc), ossl_check_ASN1_UTF8STRING_freefunc_type(freefunc))) +#define sk_ASN1_UTF8STRING_set_cmp_func(sk, cmp) ((sk_ASN1_UTF8STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS(ASN1_VISIBLESTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UNIVERSALSTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UTF8STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_NULL) +DECLARE_ASN1_FUNCTIONS(ASN1_BMPSTRING) + +int UTF8_getc(const unsigned char *str, int len, unsigned long *val); +int UTF8_putc(unsigned char *str, int len, unsigned long value); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_GENERALSTRING, ASN1_GENERALSTRING, ASN1_GENERALSTRING) +#define sk_ASN1_GENERALSTRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_value(sk, idx) ((ASN1_GENERALSTRING *)OPENSSL_sk_value(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk), (idx))) +#define sk_ASN1_GENERALSTRING_new(cmp) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new(ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp))) +#define sk_ASN1_GENERALSTRING_new_null() ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_GENERALSTRING_new_reserve(cmp, n) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp), (n))) +#define sk_ASN1_GENERALSTRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (n)) +#define sk_ASN1_GENERALSTRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_delete(sk, i) ((ASN1_GENERALSTRING *)OPENSSL_sk_delete(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (i))) +#define sk_ASN1_GENERALSTRING_delete_ptr(sk, ptr) ((ASN1_GENERALSTRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr))) +#define sk_ASN1_GENERALSTRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_pop(sk) ((ASN1_GENERALSTRING *)OPENSSL_sk_pop(ossl_check_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_shift(sk) ((ASN1_GENERALSTRING *)OPENSSL_sk_shift(ossl_check_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_GENERALSTRING_sk_type(sk),ossl_check_ASN1_GENERALSTRING_freefunc_type(freefunc)) +#define sk_ASN1_GENERALSTRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr), (idx)) +#define sk_ASN1_GENERALSTRING_set(sk, idx, ptr) ((ASN1_GENERALSTRING *)OPENSSL_sk_set(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (idx), ossl_check_ASN1_GENERALSTRING_type(ptr))) +#define sk_ASN1_GENERALSTRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr), pnum) +#define sk_ASN1_GENERALSTRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_dup(sk) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_copyfunc_type(copyfunc), ossl_check_ASN1_GENERALSTRING_freefunc_type(freefunc))) +#define sk_ASN1_GENERALSTRING_set_cmp_func(sk, cmp) ((sk_ASN1_GENERALSTRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, ASN1_PRINTABLE) + +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, DIRECTORYSTRING) +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, DISPLAYTEXT) +DECLARE_ASN1_FUNCTIONS(ASN1_PRINTABLESTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_T61STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_IA5STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_GENERALSTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UTCTIME) +DECLARE_ASN1_FUNCTIONS(ASN1_GENERALIZEDTIME) +DECLARE_ASN1_FUNCTIONS(ASN1_TIME) + +DECLARE_ASN1_DUP_FUNCTION(ASN1_TIME) +DECLARE_ASN1_DUP_FUNCTION(ASN1_UTCTIME) +DECLARE_ASN1_DUP_FUNCTION(ASN1_GENERALIZEDTIME) + +DECLARE_ASN1_ITEM(ASN1_OCTET_STRING_NDEF) + +ASN1_TIME *ASN1_TIME_set(ASN1_TIME *s, time_t t); +ASN1_TIME *ASN1_TIME_adj(ASN1_TIME *s, time_t t, + int offset_day, long offset_sec); +int ASN1_TIME_check(const ASN1_TIME *t); +ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(const ASN1_TIME *t, + ASN1_GENERALIZEDTIME **out); +int ASN1_TIME_set_string(ASN1_TIME *s, const char *str); +int ASN1_TIME_set_string_X509(ASN1_TIME *s, const char *str); +int ASN1_TIME_to_tm(const ASN1_TIME *s, struct tm *tm); +int ASN1_TIME_normalize(ASN1_TIME *s); +int ASN1_TIME_cmp_time_t(const ASN1_TIME *s, time_t t); +int ASN1_TIME_compare(const ASN1_TIME *a, const ASN1_TIME *b); + +int i2a_ASN1_INTEGER(BIO *bp, const ASN1_INTEGER *a); +int a2i_ASN1_INTEGER(BIO *bp, ASN1_INTEGER *bs, char *buf, int size); +int i2a_ASN1_ENUMERATED(BIO *bp, const ASN1_ENUMERATED *a); +int a2i_ASN1_ENUMERATED(BIO *bp, ASN1_ENUMERATED *bs, char *buf, int size); +int i2a_ASN1_OBJECT(BIO *bp, const ASN1_OBJECT *a); +int a2i_ASN1_STRING(BIO *bp, ASN1_STRING *bs, char *buf, int size); +int i2a_ASN1_STRING(BIO *bp, const ASN1_STRING *a, int type); +int i2t_ASN1_OBJECT(char *buf, int buf_len, const ASN1_OBJECT *a); + +int a2d_ASN1_OBJECT(unsigned char *out, int olen, const char *buf, int num); +ASN1_OBJECT *ASN1_OBJECT_create(int nid, unsigned char *data, int len, + const char *sn, const char *ln); + +int ASN1_INTEGER_get_int64(int64_t *pr, const ASN1_INTEGER *a); +int ASN1_INTEGER_set_int64(ASN1_INTEGER *a, int64_t r); +int ASN1_INTEGER_get_uint64(uint64_t *pr, const ASN1_INTEGER *a); +int ASN1_INTEGER_set_uint64(ASN1_INTEGER *a, uint64_t r); + +int ASN1_INTEGER_set(ASN1_INTEGER *a, long v); +long ASN1_INTEGER_get(const ASN1_INTEGER *a); +ASN1_INTEGER *BN_to_ASN1_INTEGER(const BIGNUM *bn, ASN1_INTEGER *ai); +BIGNUM *ASN1_INTEGER_to_BN(const ASN1_INTEGER *ai, BIGNUM *bn); + +int ASN1_ENUMERATED_get_int64(int64_t *pr, const ASN1_ENUMERATED *a); +int ASN1_ENUMERATED_set_int64(ASN1_ENUMERATED *a, int64_t r); + + +int ASN1_ENUMERATED_set(ASN1_ENUMERATED *a, long v); +long ASN1_ENUMERATED_get(const ASN1_ENUMERATED *a); +ASN1_ENUMERATED *BN_to_ASN1_ENUMERATED(const BIGNUM *bn, ASN1_ENUMERATED *ai); +BIGNUM *ASN1_ENUMERATED_to_BN(const ASN1_ENUMERATED *ai, BIGNUM *bn); + +/* General */ +/* given a string, return the correct type, max is the maximum length */ +int ASN1_PRINTABLE_type(const unsigned char *s, int max); + +unsigned long ASN1_tag2bit(int tag); + +/* SPECIALS */ +int ASN1_get_object(const unsigned char **pp, long *plength, int *ptag, + int *pclass, long omax); +int ASN1_check_infinite_end(unsigned char **p, long len); +int ASN1_const_check_infinite_end(const unsigned char **p, long len); +void ASN1_put_object(unsigned char **pp, int constructed, int length, + int tag, int xclass); +int ASN1_put_eoc(unsigned char **pp); +int ASN1_object_size(int constructed, int length, int tag); + +/* Used to implement other functions */ +void *ASN1_dup(i2d_of_void *i2d, d2i_of_void *d2i, const void *x); + +# define ASN1_dup_of(type,i2d,d2i,x) \ + ((type*)ASN1_dup(CHECKED_I2D_OF(type, i2d), \ + CHECKED_D2I_OF(type, d2i), \ + CHECKED_PTR_OF(const type, x))) + +void *ASN1_item_dup(const ASN1_ITEM *it, const void *x); +int ASN1_item_sign_ex(const ASN1_ITEM *it, X509_ALGOR *algor1, + X509_ALGOR *algor2, ASN1_BIT_STRING *signature, + const void *data, const ASN1_OCTET_STRING *id, + EVP_PKEY *pkey, const EVP_MD *md, OSSL_LIB_CTX *libctx, + const char *propq); +int ASN1_item_verify_ex(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + const ASN1_OCTET_STRING *id, EVP_PKEY *pkey, + OSSL_LIB_CTX *libctx, const char *propq); + +/* ASN1 alloc/free macros for when a type is only used internally */ + +# define M_ASN1_new_of(type) (type *)ASN1_item_new(ASN1_ITEM_rptr(type)) +# define M_ASN1_free_of(x, type) \ + ASN1_item_free(CHECKED_PTR_OF(type, x), ASN1_ITEM_rptr(type)) + +# ifndef OPENSSL_NO_STDIO +void *ASN1_d2i_fp(void *(*xnew) (void), d2i_of_void *d2i, FILE *in, void **x); + +# define ASN1_d2i_fp_of(type,xnew,d2i,in,x) \ + ((type*)ASN1_d2i_fp(CHECKED_NEW_OF(type, xnew), \ + CHECKED_D2I_OF(type, d2i), \ + in, \ + CHECKED_PPTR_OF(type, x))) + +void *ASN1_item_d2i_fp_ex(const ASN1_ITEM *it, FILE *in, void *x, + OSSL_LIB_CTX *libctx, const char *propq); +void *ASN1_item_d2i_fp(const ASN1_ITEM *it, FILE *in, void *x); +int ASN1_i2d_fp(i2d_of_void *i2d, FILE *out, const void *x); + +# define ASN1_i2d_fp_of(type,i2d,out,x) \ + (ASN1_i2d_fp(CHECKED_I2D_OF(type, i2d), \ + out, \ + CHECKED_PTR_OF(const type, x))) + +int ASN1_item_i2d_fp(const ASN1_ITEM *it, FILE *out, const void *x); +int ASN1_STRING_print_ex_fp(FILE *fp, const ASN1_STRING *str, unsigned long flags); +# endif + +int ASN1_STRING_to_UTF8(unsigned char **out, const ASN1_STRING *in); + +void *ASN1_d2i_bio(void *(*xnew) (void), d2i_of_void *d2i, BIO *in, void **x); + +# define ASN1_d2i_bio_of(type,xnew,d2i,in,x) \ + ((type*)ASN1_d2i_bio( CHECKED_NEW_OF(type, xnew), \ + CHECKED_D2I_OF(type, d2i), \ + in, \ + CHECKED_PPTR_OF(type, x))) + +void *ASN1_item_d2i_bio_ex(const ASN1_ITEM *it, BIO *in, void *pval, + OSSL_LIB_CTX *libctx, const char *propq); +void *ASN1_item_d2i_bio(const ASN1_ITEM *it, BIO *in, void *pval); +int ASN1_i2d_bio(i2d_of_void *i2d, BIO *out, const void *x); + +# define ASN1_i2d_bio_of(type,i2d,out,x) \ + (ASN1_i2d_bio(CHECKED_I2D_OF(type, i2d), \ + out, \ + CHECKED_PTR_OF(const type, x))) + +int ASN1_item_i2d_bio(const ASN1_ITEM *it, BIO *out, const void *x); +BIO *ASN1_item_i2d_mem_bio(const ASN1_ITEM *it, const ASN1_VALUE *val); +int ASN1_UTCTIME_print(BIO *fp, const ASN1_UTCTIME *a); +int ASN1_GENERALIZEDTIME_print(BIO *fp, const ASN1_GENERALIZEDTIME *a); +int ASN1_TIME_print(BIO *bp, const ASN1_TIME *tm); +int ASN1_TIME_print_ex(BIO *bp, const ASN1_TIME *tm, unsigned long flags); +int ASN1_STRING_print(BIO *bp, const ASN1_STRING *v); +int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long flags); +int ASN1_buf_print(BIO *bp, const unsigned char *buf, size_t buflen, int off); +int ASN1_bn_print(BIO *bp, const char *number, const BIGNUM *num, + unsigned char *buf, int off); +int ASN1_parse(BIO *bp, const unsigned char *pp, long len, int indent); +int ASN1_parse_dump(BIO *bp, const unsigned char *pp, long len, int indent, + int dump); +const char *ASN1_tag2str(int tag); + +/* Used to load and write Netscape format cert */ + +int ASN1_UNIVERSALSTRING_to_string(ASN1_UNIVERSALSTRING *s); + +int ASN1_TYPE_set_octetstring(ASN1_TYPE *a, unsigned char *data, int len); +int ASN1_TYPE_get_octetstring(const ASN1_TYPE *a, unsigned char *data, int max_len); +int ASN1_TYPE_set_int_octetstring(ASN1_TYPE *a, long num, + unsigned char *data, int len); +int ASN1_TYPE_get_int_octetstring(const ASN1_TYPE *a, long *num, + unsigned char *data, int max_len); + +void *ASN1_item_unpack(const ASN1_STRING *oct, const ASN1_ITEM *it); +void *ASN1_item_unpack_ex(const ASN1_STRING *oct, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); + +ASN1_STRING *ASN1_item_pack(void *obj, const ASN1_ITEM *it, + ASN1_OCTET_STRING **oct); + +void ASN1_STRING_set_default_mask(unsigned long mask); +int ASN1_STRING_set_default_mask_asc(const char *p); +unsigned long ASN1_STRING_get_default_mask(void); +int ASN1_mbstring_copy(ASN1_STRING **out, const unsigned char *in, int len, + int inform, unsigned long mask); +int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, + int inform, unsigned long mask, + long minsize, long maxsize); + +ASN1_STRING *ASN1_STRING_set_by_NID(ASN1_STRING **out, + const unsigned char *in, int inlen, + int inform, int nid); +ASN1_STRING_TABLE *ASN1_STRING_TABLE_get(int nid); +int ASN1_STRING_TABLE_add(int, long, long, unsigned long, unsigned long); +void ASN1_STRING_TABLE_cleanup(void); + +/* ASN1 template functions */ + +/* Old API compatible functions */ +ASN1_VALUE *ASN1_item_new(const ASN1_ITEM *it); +ASN1_VALUE *ASN1_item_new_ex(const ASN1_ITEM *it, OSSL_LIB_CTX *libctx, + const char *propq); +void ASN1_item_free(ASN1_VALUE *val, const ASN1_ITEM *it); +ASN1_VALUE *ASN1_item_d2i_ex(ASN1_VALUE **val, const unsigned char **in, + long len, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +ASN1_VALUE *ASN1_item_d2i(ASN1_VALUE **val, const unsigned char **in, + long len, const ASN1_ITEM *it); +int ASN1_item_i2d(const ASN1_VALUE *val, unsigned char **out, const ASN1_ITEM *it); +int ASN1_item_ndef_i2d(const ASN1_VALUE *val, unsigned char **out, + const ASN1_ITEM *it); + +void ASN1_add_oid_module(void); +void ASN1_add_stable_module(void); + +ASN1_TYPE *ASN1_generate_nconf(const char *str, CONF *nconf); +ASN1_TYPE *ASN1_generate_v3(const char *str, X509V3_CTX *cnf); +int ASN1_str2mask(const char *str, unsigned long *pmask); + +/* ASN1 Print flags */ + +/* Indicate missing OPTIONAL fields */ +# define ASN1_PCTX_FLAGS_SHOW_ABSENT 0x001 +/* Mark start and end of SEQUENCE */ +# define ASN1_PCTX_FLAGS_SHOW_SEQUENCE 0x002 +/* Mark start and end of SEQUENCE/SET OF */ +# define ASN1_PCTX_FLAGS_SHOW_SSOF 0x004 +/* Show the ASN1 type of primitives */ +# define ASN1_PCTX_FLAGS_SHOW_TYPE 0x008 +/* Don't show ASN1 type of ANY */ +# define ASN1_PCTX_FLAGS_NO_ANY_TYPE 0x010 +/* Don't show ASN1 type of MSTRINGs */ +# define ASN1_PCTX_FLAGS_NO_MSTRING_TYPE 0x020 +/* Don't show field names in SEQUENCE */ +# define ASN1_PCTX_FLAGS_NO_FIELD_NAME 0x040 +/* Show structure names of each SEQUENCE field */ +# define ASN1_PCTX_FLAGS_SHOW_FIELD_STRUCT_NAME 0x080 +/* Don't show structure name even at top level */ +# define ASN1_PCTX_FLAGS_NO_STRUCT_NAME 0x100 + +int ASN1_item_print(BIO *out, const ASN1_VALUE *ifld, int indent, + const ASN1_ITEM *it, const ASN1_PCTX *pctx); +ASN1_PCTX *ASN1_PCTX_new(void); +void ASN1_PCTX_free(ASN1_PCTX *p); +unsigned long ASN1_PCTX_get_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_nm_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_nm_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_cert_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_cert_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_oid_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_oid_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_str_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_str_flags(ASN1_PCTX *p, unsigned long flags); + +ASN1_SCTX *ASN1_SCTX_new(int (*scan_cb) (ASN1_SCTX *ctx)); +void ASN1_SCTX_free(ASN1_SCTX *p); +const ASN1_ITEM *ASN1_SCTX_get_item(ASN1_SCTX *p); +const ASN1_TEMPLATE *ASN1_SCTX_get_template(ASN1_SCTX *p); +unsigned long ASN1_SCTX_get_flags(ASN1_SCTX *p); +void ASN1_SCTX_set_app_data(ASN1_SCTX *p, void *data); +void *ASN1_SCTX_get_app_data(ASN1_SCTX *p); + +const BIO_METHOD *BIO_f_asn1(void); + +/* cannot constify val because of CMS_stream() */ +BIO *BIO_new_NDEF(BIO *out, ASN1_VALUE *val, const ASN1_ITEM *it); + +int i2d_ASN1_bio_stream(BIO *out, ASN1_VALUE *val, BIO *in, int flags, + const ASN1_ITEM *it); +int PEM_write_bio_ASN1_stream(BIO *out, ASN1_VALUE *val, BIO *in, int flags, + const char *hdr, const ASN1_ITEM *it); +/* cannot constify val because of CMS_dataFinal() */ +int SMIME_write_ASN1(BIO *bio, ASN1_VALUE *val, BIO *data, int flags, + int ctype_nid, int econt_nid, + STACK_OF(X509_ALGOR) *mdalgs, const ASN1_ITEM *it); +int SMIME_write_ASN1_ex(BIO *bio, ASN1_VALUE *val, BIO *data, int flags, + int ctype_nid, int econt_nid, + STACK_OF(X509_ALGOR) *mdalgs, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +ASN1_VALUE *SMIME_read_ASN1(BIO *bio, BIO **bcont, const ASN1_ITEM *it); +ASN1_VALUE *SMIME_read_ASN1_ex(BIO *bio, int flags, BIO **bcont, + const ASN1_ITEM *it, ASN1_VALUE **x, + OSSL_LIB_CTX *libctx, const char *propq); +int SMIME_crlf_copy(BIO *in, BIO *out, int flags); +int SMIME_text(BIO *in, BIO *out); + +const ASN1_ITEM *ASN1_ITEM_lookup(const char *name); +const ASN1_ITEM *ASN1_ITEM_get(size_t i); + +/* Legacy compatibility */ +# define DECLARE_ASN1_FUNCTIONS_fname(type, itname, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name) +# define DECLARE_ASN1_FUNCTIONS_const(type) DECLARE_ASN1_FUNCTIONS(type) +# define DECLARE_ASN1_ENCODE_FUNCTIONS_const(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS(type, name) +# define I2D_OF_const(type) I2D_OF(type) +# define ASN1_dup_of_const(type,i2d,d2i,x) ASN1_dup_of(type,i2d,d2i,x) +# define ASN1_i2d_fp_of_const(type,i2d,out,x) ASN1_i2d_fp_of(type,i2d,out,x) +# define ASN1_i2d_bio_of_const(type,i2d,out,x) ASN1_i2d_bio_of(type,i2d,out,x) + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1t.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1t.h new file mode 100644 index 00000000000..74ba47d0cf2 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1t.h @@ -0,0 +1,946 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/asn1t.h.in + * + * Copyright 2000-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ASN1T_H +# define OPENSSL_ASN1T_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ASN1T_H +# endif + +# include +# include +# include + +# ifdef OPENSSL_BUILD_SHLIBCRYPTO +# undef OPENSSL_EXTERN +# define OPENSSL_EXTERN OPENSSL_EXPORT +# endif + +/* ASN1 template defines, structures and functions */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*- + * These are the possible values for the itype field of the + * ASN1_ITEM structure and determine how it is interpreted. + * + * For PRIMITIVE types the underlying type + * determines the behaviour if items is NULL. + * + * Otherwise templates must contain a single + * template and the type is treated in the + * same way as the type specified in the template. + * + * For SEQUENCE types the templates field points + * to the members, the size field is the + * structure size. + * + * For CHOICE types the templates field points + * to each possible member (typically a union) + * and the 'size' field is the offset of the + * selector. + * + * The 'funcs' field is used for application-specific + * data and functions. + * + * The EXTERN type uses a new style d2i/i2d. + * The new style should be used where possible + * because it avoids things like the d2i IMPLICIT + * hack. + * + * MSTRING is a multiple string type, it is used + * for a CHOICE of character strings where the + * actual strings all occupy an ASN1_STRING + * structure. In this case the 'utype' field + * has a special meaning, it is used as a mask + * of acceptable types using the B_ASN1 constants. + * + * NDEF_SEQUENCE is the same as SEQUENCE except + * that it will use indefinite length constructed + * encoding if requested. + * + */ + +# define ASN1_ITYPE_PRIMITIVE 0x0 +# define ASN1_ITYPE_SEQUENCE 0x1 +# define ASN1_ITYPE_CHOICE 0x2 +/* unused value 0x3 */ +# define ASN1_ITYPE_EXTERN 0x4 +# define ASN1_ITYPE_MSTRING 0x5 +# define ASN1_ITYPE_NDEF_SEQUENCE 0x6 + +/* Macro to obtain ASN1_ADB pointer from a type (only used internally) */ +# define ASN1_ADB_ptr(iptr) ((const ASN1_ADB *)((iptr)())) + +/* Macros for start and end of ASN1_ITEM definition */ + +# define ASN1_ITEM_start(itname) \ + const ASN1_ITEM * itname##_it(void) \ + { \ + static const ASN1_ITEM local_it = { + +# define static_ASN1_ITEM_start(itname) \ + static ASN1_ITEM_start(itname) + +# define ASN1_ITEM_end(itname) \ + }; \ + return &local_it; \ + } + +/* Macros to aid ASN1 template writing */ + +# define ASN1_ITEM_TEMPLATE(tname) \ + static const ASN1_TEMPLATE tname##_item_tt + +# define ASN1_ITEM_TEMPLATE_END(tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_PRIMITIVE,\ + -1,\ + &tname##_item_tt,\ + 0,\ + NULL,\ + 0,\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_ITEM_TEMPLATE_END(tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_PRIMITIVE,\ + -1,\ + &tname##_item_tt,\ + 0,\ + NULL,\ + 0,\ + #tname \ + ASN1_ITEM_end(tname) + +/* This is a ASN1 type which just embeds a template */ + +/*- + * This pair helps declare a SEQUENCE. We can do: + * + * ASN1_SEQUENCE(stname) = { + * ... SEQUENCE components ... + * } ASN1_SEQUENCE_END(stname) + * + * This will produce an ASN1_ITEM called stname_it + * for a structure called stname. + * + * If you want the same structure but a different + * name then use: + * + * ASN1_SEQUENCE(itname) = { + * ... SEQUENCE components ... + * } ASN1_SEQUENCE_END_name(stname, itname) + * + * This will create an item called itname_it using + * a structure called stname. + */ + +# define ASN1_SEQUENCE(tname) \ + static const ASN1_TEMPLATE tname##_seq_tt[] + +# define ASN1_SEQUENCE_END(stname) ASN1_SEQUENCE_END_name(stname, stname) + +# define static_ASN1_SEQUENCE_END(stname) static_ASN1_SEQUENCE_END_name(stname, stname) + +# define ASN1_SEQUENCE_END_name(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #tname \ + ASN1_ITEM_end(tname) + +# define static_ASN1_SEQUENCE_END_name(stname, tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_NDEF_SEQUENCE(tname) \ + ASN1_SEQUENCE(tname) + +# define ASN1_NDEF_SEQUENCE_cb(tname, cb) \ + ASN1_SEQUENCE_cb(tname, cb) + +# define ASN1_SEQUENCE_cb(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, 0, 0, 0, cb, 0, NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_const_cb(tname, const_cb) \ + static const ASN1_AUX tname##_aux = \ + {NULL, ASN1_AFLG_CONST_CB, 0, 0, NULL, 0, const_cb}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_cb_const_cb(tname, cb, const_cb) \ + static const ASN1_AUX tname##_aux = \ + {NULL, ASN1_AFLG_CONST_CB, 0, 0, cb, 0, const_cb}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_ref(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_REFCOUNT, offsetof(tname, references), offsetof(tname, lock), cb, 0, NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_enc(tname, enc, cb) \ + static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_ENCODING, 0, 0, cb, offsetof(tname, enc), NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_NDEF_SEQUENCE_END(tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(tname),\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_NDEF_SEQUENCE_END(tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(tname),\ + #tname \ + ASN1_ITEM_end(tname) + + +# define ASN1_SEQUENCE_END_enc(stname, tname) ASN1_SEQUENCE_END_ref(stname, tname) + +# define ASN1_SEQUENCE_END_cb(stname, tname) ASN1_SEQUENCE_END_ref(stname, tname) +# define static_ASN1_SEQUENCE_END_cb(stname, tname) static_ASN1_SEQUENCE_END_ref(stname, tname) + +# define ASN1_SEQUENCE_END_ref(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_SEQUENCE_END_ref(stname, tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_NDEF_SEQUENCE_END_cb(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +/*- + * This pair helps declare a CHOICE type. We can do: + * + * ASN1_CHOICE(chname) = { + * ... CHOICE options ... + * ASN1_CHOICE_END(chname) + * + * This will produce an ASN1_ITEM called chname_it + * for a structure called chname. The structure + * definition must look like this: + * typedef struct { + * int type; + * union { + * ASN1_SOMETHING *opt1; + * ASN1_SOMEOTHER *opt2; + * } value; + * } chname; + * + * the name of the selector must be 'type'. + * to use an alternative selector name use the + * ASN1_CHOICE_END_selector() version. + */ + +# define ASN1_CHOICE(tname) \ + static const ASN1_TEMPLATE tname##_ch_tt[] + +# define ASN1_CHOICE_cb(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, 0, 0, 0, cb, 0, NULL}; \ + ASN1_CHOICE(tname) + +# define ASN1_CHOICE_END(stname) ASN1_CHOICE_END_name(stname, stname) + +# define static_ASN1_CHOICE_END(stname) static_ASN1_CHOICE_END_name(stname, stname) + +# define ASN1_CHOICE_END_name(stname, tname) ASN1_CHOICE_END_selector(stname, tname, type) + +# define static_ASN1_CHOICE_END_name(stname, tname) static_ASN1_CHOICE_END_selector(stname, tname, type) + +# define ASN1_CHOICE_END_selector(stname, tname, selname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define static_ASN1_CHOICE_END_selector(stname, tname, selname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_CHOICE_END_cb(stname, tname, selname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +/* This helps with the template wrapper form of ASN1_ITEM */ + +# define ASN1_EX_TEMPLATE_TYPE(flags, tag, name, type) { \ + (flags), (tag), 0,\ + #name, ASN1_ITEM_ref(type) } + +/* These help with SEQUENCE or CHOICE components */ + +/* used to declare other types */ + +# define ASN1_EX_TYPE(flags, tag, stname, field, type) { \ + (flags), (tag), offsetof(stname, field),\ + #field, ASN1_ITEM_ref(type) } + +/* implicit and explicit helper macros */ + +# define ASN1_IMP_EX(stname, field, type, tag, ex) \ + ASN1_EX_TYPE(ASN1_TFLG_IMPLICIT | (ex), tag, stname, field, type) + +# define ASN1_EXP_EX(stname, field, type, tag, ex) \ + ASN1_EX_TYPE(ASN1_TFLG_EXPLICIT | (ex), tag, stname, field, type) + +/* Any defined by macros: the field used is in the table itself */ + +# define ASN1_ADB_OBJECT(tblname) { ASN1_TFLG_ADB_OID, -1, 0, #tblname, tblname##_adb } +# define ASN1_ADB_INTEGER(tblname) { ASN1_TFLG_ADB_INT, -1, 0, #tblname, tblname##_adb } + +/* Plain simple type */ +# define ASN1_SIMPLE(stname, field, type) ASN1_EX_TYPE(0,0, stname, field, type) +/* Embedded simple type */ +# define ASN1_EMBED(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_EMBED,0, stname, field, type) + +/* OPTIONAL simple type */ +# define ASN1_OPT(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_OPTIONAL, 0, stname, field, type) +# define ASN1_OPT_EMBED(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED, 0, stname, field, type) + +/* IMPLICIT tagged simple type */ +# define ASN1_IMP(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, 0) +# define ASN1_IMP_EMBED(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_EMBED) + +/* IMPLICIT tagged OPTIONAL simple type */ +# define ASN1_IMP_OPT(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL) +# define ASN1_IMP_OPT_EMBED(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED) + +/* Same as above but EXPLICIT */ + +# define ASN1_EXP(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, 0) +# define ASN1_EXP_EMBED(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_EMBED) +# define ASN1_EXP_OPT(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL) +# define ASN1_EXP_OPT_EMBED(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED) + +/* SEQUENCE OF type */ +# define ASN1_SEQUENCE_OF(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SEQUENCE_OF, 0, stname, field, type) + +/* OPTIONAL SEQUENCE OF */ +# define ASN1_SEQUENCE_OF_OPT(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL, 0, stname, field, type) + +/* Same as above but for SET OF */ + +# define ASN1_SET_OF(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SET_OF, 0, stname, field, type) + +# define ASN1_SET_OF_OPT(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL, 0, stname, field, type) + +/* Finally compound types of SEQUENCE, SET, IMPLICIT, EXPLICIT and OPTIONAL */ + +# define ASN1_IMP_SET_OF(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF) + +# define ASN1_EXP_SET_OF(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF) + +# define ASN1_IMP_SET_OF_OPT(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_EXP_SET_OF_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_IMP_SEQUENCE_OF(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF) + +# define ASN1_IMP_SEQUENCE_OF_OPT(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_EXP_SEQUENCE_OF(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF) + +# define ASN1_EXP_SEQUENCE_OF_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL) + +/* EXPLICIT using indefinite length constructed form */ +# define ASN1_NDEF_EXP(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_NDEF) + +/* EXPLICIT OPTIONAL using indefinite length constructed form */ +# define ASN1_NDEF_EXP_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_NDEF) + +/* Macros for the ASN1_ADB structure */ + +# define ASN1_ADB(name) \ + static const ASN1_ADB_TABLE name##_adbtbl[] + +# define ASN1_ADB_END(name, flags, field, adb_cb, def, none) \ + ;\ + static const ASN1_ITEM *name##_adb(void) \ + { \ + static const ASN1_ADB internal_adb = \ + {\ + flags,\ + offsetof(name, field),\ + adb_cb,\ + name##_adbtbl,\ + sizeof(name##_adbtbl) / sizeof(ASN1_ADB_TABLE),\ + def,\ + none\ + }; \ + return (const ASN1_ITEM *) &internal_adb; \ + } \ + void dummy_function(void) + +# define ADB_ENTRY(val, template) {val, template} + +# define ASN1_ADB_TEMPLATE(name) \ + static const ASN1_TEMPLATE name##_tt + +/* + * This is the ASN1 template structure that defines a wrapper round the + * actual type. It determines the actual position of the field in the value + * structure, various flags such as OPTIONAL and the field name. + */ + +struct ASN1_TEMPLATE_st { + unsigned long flags; /* Various flags */ + long tag; /* tag, not used if no tagging */ + unsigned long offset; /* Offset of this field in structure */ + const char *field_name; /* Field name */ + ASN1_ITEM_EXP *item; /* Relevant ASN1_ITEM or ASN1_ADB */ +}; + +/* Macro to extract ASN1_ITEM and ASN1_ADB pointer from ASN1_TEMPLATE */ + +# define ASN1_TEMPLATE_item(t) (t->item_ptr) +# define ASN1_TEMPLATE_adb(t) (t->item_ptr) + +typedef struct ASN1_ADB_TABLE_st ASN1_ADB_TABLE; +typedef struct ASN1_ADB_st ASN1_ADB; + +struct ASN1_ADB_st { + unsigned long flags; /* Various flags */ + unsigned long offset; /* Offset of selector field */ + int (*adb_cb)(long *psel); /* Application callback */ + const ASN1_ADB_TABLE *tbl; /* Table of possible types */ + long tblcount; /* Number of entries in tbl */ + const ASN1_TEMPLATE *default_tt; /* Type to use if no match */ + const ASN1_TEMPLATE *null_tt; /* Type to use if selector is NULL */ +}; + +struct ASN1_ADB_TABLE_st { + long value; /* NID for an object or value for an int */ + const ASN1_TEMPLATE tt; /* item for this value */ +}; + +/* template flags */ + +/* Field is optional */ +# define ASN1_TFLG_OPTIONAL (0x1) + +/* Field is a SET OF */ +# define ASN1_TFLG_SET_OF (0x1 << 1) + +/* Field is a SEQUENCE OF */ +# define ASN1_TFLG_SEQUENCE_OF (0x2 << 1) + +/* + * Special case: this refers to a SET OF that will be sorted into DER order + * when encoded *and* the corresponding STACK will be modified to match the + * new order. + */ +# define ASN1_TFLG_SET_ORDER (0x3 << 1) + +/* Mask for SET OF or SEQUENCE OF */ +# define ASN1_TFLG_SK_MASK (0x3 << 1) + +/* + * These flags mean the tag should be taken from the tag field. If EXPLICIT + * then the underlying type is used for the inner tag. + */ + +/* IMPLICIT tagging */ +# define ASN1_TFLG_IMPTAG (0x1 << 3) + +/* EXPLICIT tagging, inner tag from underlying type */ +# define ASN1_TFLG_EXPTAG (0x2 << 3) + +# define ASN1_TFLG_TAG_MASK (0x3 << 3) + +/* context specific IMPLICIT */ +# define ASN1_TFLG_IMPLICIT (ASN1_TFLG_IMPTAG|ASN1_TFLG_CONTEXT) + +/* context specific EXPLICIT */ +# define ASN1_TFLG_EXPLICIT (ASN1_TFLG_EXPTAG|ASN1_TFLG_CONTEXT) + +/* + * If tagging is in force these determine the type of tag to use. Otherwise + * the tag is determined by the underlying type. These values reflect the + * actual octet format. + */ + +/* Universal tag */ +# define ASN1_TFLG_UNIVERSAL (0x0<<6) +/* Application tag */ +# define ASN1_TFLG_APPLICATION (0x1<<6) +/* Context specific tag */ +# define ASN1_TFLG_CONTEXT (0x2<<6) +/* Private tag */ +# define ASN1_TFLG_PRIVATE (0x3<<6) + +# define ASN1_TFLG_TAG_CLASS (0x3<<6) + +/* + * These are for ANY DEFINED BY type. In this case the 'item' field points to + * an ASN1_ADB structure which contains a table of values to decode the + * relevant type + */ + +# define ASN1_TFLG_ADB_MASK (0x3<<8) + +# define ASN1_TFLG_ADB_OID (0x1<<8) + +# define ASN1_TFLG_ADB_INT (0x1<<9) + +/* + * This flag when present in a SEQUENCE OF, SET OF or EXPLICIT causes + * indefinite length constructed encoding to be used if required. + */ + +# define ASN1_TFLG_NDEF (0x1<<11) + +/* Field is embedded and not a pointer */ +# define ASN1_TFLG_EMBED (0x1 << 12) + +/* This is the actual ASN1 item itself */ + +struct ASN1_ITEM_st { + char itype; /* The item type, primitive, SEQUENCE, CHOICE + * or extern */ + long utype; /* underlying type */ + const ASN1_TEMPLATE *templates; /* If SEQUENCE or CHOICE this contains + * the contents */ + long tcount; /* Number of templates if SEQUENCE or CHOICE */ + const void *funcs; /* further data and type-specific functions */ + /* funcs can be ASN1_PRIMITIVE_FUNCS*, ASN1_EXTERN_FUNCS*, or ASN1_AUX* */ + long size; /* Structure size (usually) */ + const char *sname; /* Structure name */ +}; + +/* + * Cache for ASN1 tag and length, so we don't keep re-reading it for things + * like CHOICE + */ + +struct ASN1_TLC_st { + char valid; /* Values below are valid */ + int ret; /* return value */ + long plen; /* length */ + int ptag; /* class value */ + int pclass; /* class value */ + int hdrlen; /* header length */ +}; + +/* Typedefs for ASN1 function pointers */ +typedef int ASN1_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx); + +typedef int ASN1_ex_d2i_ex(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx, OSSL_LIB_CTX *libctx, + const char *propq); +typedef int ASN1_ex_i2d(const ASN1_VALUE **pval, unsigned char **out, + const ASN1_ITEM *it, int tag, int aclass); +typedef int ASN1_ex_new_func(ASN1_VALUE **pval, const ASN1_ITEM *it); +typedef int ASN1_ex_new_ex_func(ASN1_VALUE **pval, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +typedef void ASN1_ex_free_func(ASN1_VALUE **pval, const ASN1_ITEM *it); + +typedef int ASN1_ex_print_func(BIO *out, const ASN1_VALUE **pval, + int indent, const char *fname, + const ASN1_PCTX *pctx); + +typedef int ASN1_primitive_i2c(const ASN1_VALUE **pval, unsigned char *cont, + int *putype, const ASN1_ITEM *it); +typedef int ASN1_primitive_c2i(ASN1_VALUE **pval, const unsigned char *cont, + int len, int utype, char *free_cont, + const ASN1_ITEM *it); +typedef int ASN1_primitive_print(BIO *out, const ASN1_VALUE **pval, + const ASN1_ITEM *it, int indent, + const ASN1_PCTX *pctx); + +typedef struct ASN1_EXTERN_FUNCS_st { + void *app_data; + ASN1_ex_new_func *asn1_ex_new; + ASN1_ex_free_func *asn1_ex_free; + ASN1_ex_free_func *asn1_ex_clear; + ASN1_ex_d2i *asn1_ex_d2i; + ASN1_ex_i2d *asn1_ex_i2d; + ASN1_ex_print_func *asn1_ex_print; + ASN1_ex_new_ex_func *asn1_ex_new_ex; + ASN1_ex_d2i_ex *asn1_ex_d2i_ex; +} ASN1_EXTERN_FUNCS; + +typedef struct ASN1_PRIMITIVE_FUNCS_st { + void *app_data; + unsigned long flags; + ASN1_ex_new_func *prim_new; + ASN1_ex_free_func *prim_free; + ASN1_ex_free_func *prim_clear; + ASN1_primitive_c2i *prim_c2i; + ASN1_primitive_i2c *prim_i2c; + ASN1_primitive_print *prim_print; +} ASN1_PRIMITIVE_FUNCS; + +/* + * This is the ASN1_AUX structure: it handles various miscellaneous + * requirements. For example the use of reference counts and an informational + * callback. The "informational callback" is called at various points during + * the ASN1 encoding and decoding. It can be used to provide minor + * customisation of the structures used. This is most useful where the + * supplied routines *almost* do the right thing but need some extra help at + * a few points. If the callback returns zero then it is assumed a fatal + * error has occurred and the main operation should be abandoned. If major + * changes in the default behaviour are required then an external type is + * more appropriate. + * For the operations ASN1_OP_I2D_PRE, ASN1_OP_I2D_POST, ASN1_OP_PRINT_PRE, and + * ASN1_OP_PRINT_POST, meanwhile a variant of the callback with const parameter + * 'in' is provided to make clear statically that its input is not modified. If + * and only if this variant is in use the flag ASN1_AFLG_CONST_CB must be set. + */ + +typedef int ASN1_aux_cb(int operation, ASN1_VALUE **in, const ASN1_ITEM *it, + void *exarg); +typedef int ASN1_aux_const_cb(int operation, const ASN1_VALUE **in, + const ASN1_ITEM *it, void *exarg); + +typedef struct ASN1_AUX_st { + void *app_data; + int flags; + int ref_offset; /* Offset of reference value */ + int ref_lock; /* Offset of lock value */ + ASN1_aux_cb *asn1_cb; + int enc_offset; /* Offset of ASN1_ENCODING structure */ + ASN1_aux_const_cb *asn1_const_cb; /* for ASN1_OP_I2D_ and ASN1_OP_PRINT_ */ +} ASN1_AUX; + +/* For print related callbacks exarg points to this structure */ +typedef struct ASN1_PRINT_ARG_st { + BIO *out; + int indent; + const ASN1_PCTX *pctx; +} ASN1_PRINT_ARG; + +/* For streaming related callbacks exarg points to this structure */ +typedef struct ASN1_STREAM_ARG_st { + /* BIO to stream through */ + BIO *out; + /* BIO with filters appended */ + BIO *ndef_bio; + /* Streaming I/O boundary */ + unsigned char **boundary; +} ASN1_STREAM_ARG; + +/* Flags in ASN1_AUX */ + +/* Use a reference count */ +# define ASN1_AFLG_REFCOUNT 1 +/* Save the encoding of structure (useful for signatures) */ +# define ASN1_AFLG_ENCODING 2 +/* The Sequence length is invalid */ +# define ASN1_AFLG_BROKEN 4 +/* Use the new asn1_const_cb */ +# define ASN1_AFLG_CONST_CB 8 + +/* operation values for asn1_cb */ + +# define ASN1_OP_NEW_PRE 0 +# define ASN1_OP_NEW_POST 1 +# define ASN1_OP_FREE_PRE 2 +# define ASN1_OP_FREE_POST 3 +# define ASN1_OP_D2I_PRE 4 +# define ASN1_OP_D2I_POST 5 +# define ASN1_OP_I2D_PRE 6 +# define ASN1_OP_I2D_POST 7 +# define ASN1_OP_PRINT_PRE 8 +# define ASN1_OP_PRINT_POST 9 +# define ASN1_OP_STREAM_PRE 10 +# define ASN1_OP_STREAM_POST 11 +# define ASN1_OP_DETACHED_PRE 12 +# define ASN1_OP_DETACHED_POST 13 +# define ASN1_OP_DUP_PRE 14 +# define ASN1_OP_DUP_POST 15 +# define ASN1_OP_GET0_LIBCTX 16 +# define ASN1_OP_GET0_PROPQ 17 + +/* Macro to implement a primitive type */ +# define IMPLEMENT_ASN1_TYPE(stname) IMPLEMENT_ASN1_TYPE_ex(stname, stname, 0) +# define IMPLEMENT_ASN1_TYPE_ex(itname, vname, ex) \ + ASN1_ITEM_start(itname) \ + ASN1_ITYPE_PRIMITIVE, V_##vname, NULL, 0, NULL, ex, #itname \ + ASN1_ITEM_end(itname) + +/* Macro to implement a multi string type */ +# define IMPLEMENT_ASN1_MSTRING(itname, mask) \ + ASN1_ITEM_start(itname) \ + ASN1_ITYPE_MSTRING, mask, NULL, 0, NULL, sizeof(ASN1_STRING), #itname \ + ASN1_ITEM_end(itname) + +# define IMPLEMENT_EXTERN_ASN1(sname, tag, fptrs) \ + ASN1_ITEM_start(sname) \ + ASN1_ITYPE_EXTERN, \ + tag, \ + NULL, \ + 0, \ + &fptrs, \ + 0, \ + #sname \ + ASN1_ITEM_end(sname) + +/* Macro to implement standard functions in terms of ASN1_ITEM structures */ + +# define IMPLEMENT_ASN1_FUNCTIONS(stname) IMPLEMENT_ASN1_FUNCTIONS_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_FUNCTIONS_name(stname, itname) IMPLEMENT_ASN1_FUNCTIONS_fname(stname, itname, itname) + +# define IMPLEMENT_ASN1_FUNCTIONS_ENCODE_name(stname, itname) \ + IMPLEMENT_ASN1_FUNCTIONS_ENCODE_fname(stname, itname, itname) + +# define IMPLEMENT_STATIC_ASN1_ALLOC_FUNCTIONS(stname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_pfname(static, stname, stname, stname) + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS(stname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS_pfname(pre, stname, itname, fname) \ + pre stname *fname##_new(void) \ + { \ + return (stname *)ASN1_item_new(ASN1_ITEM_rptr(itname)); \ + } \ + pre void fname##_free(stname *a) \ + { \ + ASN1_item_free((ASN1_VALUE *)a, ASN1_ITEM_rptr(itname)); \ + } + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, itname, fname) \ + stname *fname##_new(void) \ + { \ + return (stname *)ASN1_item_new(ASN1_ITEM_rptr(itname)); \ + } \ + void fname##_free(stname *a) \ + { \ + ASN1_item_free((ASN1_VALUE *)a, ASN1_ITEM_rptr(itname)); \ + } + +# define IMPLEMENT_ASN1_FUNCTIONS_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, itname, fname) + +# define IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) \ + stname *d2i_##fname(stname **a, const unsigned char **in, long len) \ + { \ + return (stname *)ASN1_item_d2i((ASN1_VALUE **)a, in, len, ASN1_ITEM_rptr(itname));\ + } \ + int i2d_##fname(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_i2d((const ASN1_VALUE *)a, out, ASN1_ITEM_rptr(itname));\ + } + +# define IMPLEMENT_ASN1_NDEF_FUNCTION(stname) \ + int i2d_##stname##_NDEF(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_ndef_i2d((const ASN1_VALUE *)a, out, ASN1_ITEM_rptr(stname));\ + } + +# define IMPLEMENT_STATIC_ASN1_ENCODE_FUNCTIONS(stname) \ + static stname *d2i_##stname(stname **a, \ + const unsigned char **in, long len) \ + { \ + return (stname *)ASN1_item_d2i((ASN1_VALUE **)a, in, len, \ + ASN1_ITEM_rptr(stname)); \ + } \ + static int i2d_##stname(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_i2d((const ASN1_VALUE *)a, out, \ + ASN1_ITEM_rptr(stname)); \ + } + +# define IMPLEMENT_ASN1_DUP_FUNCTION(stname) \ + stname * stname##_dup(const stname *x) \ + { \ + return ASN1_item_dup(ASN1_ITEM_rptr(stname), x); \ + } + +# define IMPLEMENT_ASN1_PRINT_FUNCTION(stname) \ + IMPLEMENT_ASN1_PRINT_FUNCTION_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_PRINT_FUNCTION_fname(stname, itname, fname) \ + int fname##_print_ctx(BIO *out, const stname *x, int indent, \ + const ASN1_PCTX *pctx) \ + { \ + return ASN1_item_print(out, (const ASN1_VALUE *)x, indent, \ + ASN1_ITEM_rptr(itname), pctx); \ + } + +/* external definitions for primitive types */ + +DECLARE_ASN1_ITEM(ASN1_BOOLEAN) +DECLARE_ASN1_ITEM(ASN1_TBOOLEAN) +DECLARE_ASN1_ITEM(ASN1_FBOOLEAN) +DECLARE_ASN1_ITEM(ASN1_SEQUENCE) +DECLARE_ASN1_ITEM(CBIGNUM) +DECLARE_ASN1_ITEM(BIGNUM) +DECLARE_ASN1_ITEM(INT32) +DECLARE_ASN1_ITEM(ZINT32) +DECLARE_ASN1_ITEM(UINT32) +DECLARE_ASN1_ITEM(ZUINT32) +DECLARE_ASN1_ITEM(INT64) +DECLARE_ASN1_ITEM(ZINT64) +DECLARE_ASN1_ITEM(UINT64) +DECLARE_ASN1_ITEM(ZUINT64) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* + * LONG and ZLONG are strongly discouraged for use as stored data, as the + * underlying C type (long) differs in size depending on the architecture. + * They are designed with 32-bit longs in mind. + */ +DECLARE_ASN1_ITEM(LONG) +DECLARE_ASN1_ITEM(ZLONG) +# endif + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_VALUE, ASN1_VALUE, ASN1_VALUE) +#define sk_ASN1_VALUE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_value(sk, idx) ((ASN1_VALUE *)OPENSSL_sk_value(ossl_check_const_ASN1_VALUE_sk_type(sk), (idx))) +#define sk_ASN1_VALUE_new(cmp) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new(ossl_check_ASN1_VALUE_compfunc_type(cmp))) +#define sk_ASN1_VALUE_new_null() ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_VALUE_new_reserve(cmp, n) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_VALUE_compfunc_type(cmp), (n))) +#define sk_ASN1_VALUE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_VALUE_sk_type(sk), (n)) +#define sk_ASN1_VALUE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_delete(sk, i) ((ASN1_VALUE *)OPENSSL_sk_delete(ossl_check_ASN1_VALUE_sk_type(sk), (i))) +#define sk_ASN1_VALUE_delete_ptr(sk, ptr) ((ASN1_VALUE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr))) +#define sk_ASN1_VALUE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_pop(sk) ((ASN1_VALUE *)OPENSSL_sk_pop(ossl_check_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_shift(sk) ((ASN1_VALUE *)OPENSSL_sk_shift(ossl_check_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_VALUE_sk_type(sk),ossl_check_ASN1_VALUE_freefunc_type(freefunc)) +#define sk_ASN1_VALUE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr), (idx)) +#define sk_ASN1_VALUE_set(sk, idx, ptr) ((ASN1_VALUE *)OPENSSL_sk_set(ossl_check_ASN1_VALUE_sk_type(sk), (idx), ossl_check_ASN1_VALUE_type(ptr))) +#define sk_ASN1_VALUE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr), pnum) +#define sk_ASN1_VALUE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_dup(sk) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_copyfunc_type(copyfunc), ossl_check_ASN1_VALUE_freefunc_type(freefunc))) +#define sk_ASN1_VALUE_set_cmp_func(sk, cmp) ((sk_ASN1_VALUE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_compfunc_type(cmp))) + + + +/* Functions used internally by the ASN1 code */ + +int ASN1_item_ex_new(ASN1_VALUE **pval, const ASN1_ITEM *it); +void ASN1_item_ex_free(ASN1_VALUE **pval, const ASN1_ITEM *it); + +int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx); + +int ASN1_item_ex_i2d(const ASN1_VALUE **pval, unsigned char **out, + const ASN1_ITEM *it, int tag, int aclass); + +/* Legacy compatibility */ +# define IMPLEMENT_ASN1_FUNCTIONS_const(name) IMPLEMENT_ASN1_FUNCTIONS(name) +# define IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/bio.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/bio.h new file mode 100644 index 00000000000..ea584defe5c --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/bio.h @@ -0,0 +1,1010 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/bio.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#ifndef OPENSSL_BIO_H +# define OPENSSL_BIO_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_BIO_H +# endif + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# endif +# include + +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/* There are the classes of BIOs */ +# define BIO_TYPE_DESCRIPTOR 0x0100 /* socket, fd, connect or accept */ +# define BIO_TYPE_FILTER 0x0200 +# define BIO_TYPE_SOURCE_SINK 0x0400 + +/* These are the 'types' of BIOs */ +# define BIO_TYPE_NONE 0 +# define BIO_TYPE_MEM ( 1|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_FILE ( 2|BIO_TYPE_SOURCE_SINK) + +# define BIO_TYPE_FD ( 4|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_SOCKET ( 5|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_NULL ( 6|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_SSL ( 7|BIO_TYPE_FILTER) +# define BIO_TYPE_MD ( 8|BIO_TYPE_FILTER) +# define BIO_TYPE_BUFFER ( 9|BIO_TYPE_FILTER) +# define BIO_TYPE_CIPHER (10|BIO_TYPE_FILTER) +# define BIO_TYPE_BASE64 (11|BIO_TYPE_FILTER) +# define BIO_TYPE_CONNECT (12|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_ACCEPT (13|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) + +# define BIO_TYPE_NBIO_TEST (16|BIO_TYPE_FILTER)/* server proxy BIO */ +# define BIO_TYPE_NULL_FILTER (17|BIO_TYPE_FILTER) +# define BIO_TYPE_BIO (19|BIO_TYPE_SOURCE_SINK)/* half a BIO pair */ +# define BIO_TYPE_LINEBUFFER (20|BIO_TYPE_FILTER) +# define BIO_TYPE_DGRAM (21|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_ASN1 (22|BIO_TYPE_FILTER) +# define BIO_TYPE_COMP (23|BIO_TYPE_FILTER) +# ifndef OPENSSL_NO_SCTP +# define BIO_TYPE_DGRAM_SCTP (24|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# endif +# define BIO_TYPE_CORE_TO_PROV (25|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_DGRAM_PAIR (26|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_DGRAM_MEM (27|BIO_TYPE_SOURCE_SINK) + +#define BIO_TYPE_START 128 + +/* + * BIO_FILENAME_READ|BIO_CLOSE to open or close on free. + * BIO_set_fp(in,stdin,BIO_NOCLOSE); + */ +# define BIO_NOCLOSE 0x00 +# define BIO_CLOSE 0x01 + +/* + * These are used in the following macros and are passed to BIO_ctrl() + */ +# define BIO_CTRL_RESET 1/* opt - rewind/zero etc */ +# define BIO_CTRL_EOF 2/* opt - are we at the eof */ +# define BIO_CTRL_INFO 3/* opt - extra tit-bits */ +# define BIO_CTRL_SET 4/* man - set the 'IO' type */ +# define BIO_CTRL_GET 5/* man - get the 'IO' type */ +# define BIO_CTRL_PUSH 6/* opt - internal, used to signify change */ +# define BIO_CTRL_POP 7/* opt - internal, used to signify change */ +# define BIO_CTRL_GET_CLOSE 8/* man - set the 'close' on free */ +# define BIO_CTRL_SET_CLOSE 9/* man - set the 'close' on free */ +# define BIO_CTRL_PENDING 10/* opt - is their more data buffered */ +# define BIO_CTRL_FLUSH 11/* opt - 'flush' buffered output */ +# define BIO_CTRL_DUP 12/* man - extra stuff for 'duped' BIO */ +# define BIO_CTRL_WPENDING 13/* opt - number of bytes still to write */ +# define BIO_CTRL_SET_CALLBACK 14/* opt - set callback function */ +# define BIO_CTRL_GET_CALLBACK 15/* opt - set callback function */ + +# define BIO_CTRL_PEEK 29/* BIO_f_buffer special */ +# define BIO_CTRL_SET_FILENAME 30/* BIO_s_file special */ + +/* dgram BIO stuff */ +# define BIO_CTRL_DGRAM_CONNECT 31/* BIO dgram special */ +# define BIO_CTRL_DGRAM_SET_CONNECTED 32/* allow for an externally connected + * socket to be passed in */ +# define BIO_CTRL_DGRAM_SET_RECV_TIMEOUT 33/* setsockopt, essentially */ +# define BIO_CTRL_DGRAM_GET_RECV_TIMEOUT 34/* getsockopt, essentially */ +# define BIO_CTRL_DGRAM_SET_SEND_TIMEOUT 35/* setsockopt, essentially */ +# define BIO_CTRL_DGRAM_GET_SEND_TIMEOUT 36/* getsockopt, essentially */ + +# define BIO_CTRL_DGRAM_GET_RECV_TIMER_EXP 37/* flag whether the last */ +# define BIO_CTRL_DGRAM_GET_SEND_TIMER_EXP 38/* I/O operation timed out */ + +/* #ifdef IP_MTU_DISCOVER */ +# define BIO_CTRL_DGRAM_MTU_DISCOVER 39/* set DF bit on egress packets */ +/* #endif */ + +# define BIO_CTRL_DGRAM_QUERY_MTU 40/* as kernel for current MTU */ +# define BIO_CTRL_DGRAM_GET_FALLBACK_MTU 47 +# define BIO_CTRL_DGRAM_GET_MTU 41/* get cached value for MTU */ +# define BIO_CTRL_DGRAM_SET_MTU 42/* set cached value for MTU. + * want to use this if asking + * the kernel fails */ + +# define BIO_CTRL_DGRAM_MTU_EXCEEDED 43/* check whether the MTU was + * exceed in the previous write + * operation */ + +# define BIO_CTRL_DGRAM_GET_PEER 46 +# define BIO_CTRL_DGRAM_SET_PEER 44/* Destination for the data */ + +# define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT 45/* Next DTLS handshake timeout + * to adjust socket timeouts */ +# define BIO_CTRL_DGRAM_SET_DONT_FRAG 48 + +# define BIO_CTRL_DGRAM_GET_MTU_OVERHEAD 49 + +/* Deliberately outside of OPENSSL_NO_SCTP - used in bss_dgram.c */ +# define BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE 50 +# ifndef OPENSSL_NO_SCTP +/* SCTP stuff */ +# define BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY 51 +# define BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY 52 +# define BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD 53 +# define BIO_CTRL_DGRAM_SCTP_GET_SNDINFO 60 +# define BIO_CTRL_DGRAM_SCTP_SET_SNDINFO 61 +# define BIO_CTRL_DGRAM_SCTP_GET_RCVINFO 62 +# define BIO_CTRL_DGRAM_SCTP_SET_RCVINFO 63 +# define BIO_CTRL_DGRAM_SCTP_GET_PRINFO 64 +# define BIO_CTRL_DGRAM_SCTP_SET_PRINFO 65 +# define BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN 70 +# endif + +# define BIO_CTRL_DGRAM_SET_PEEK_MODE 71 + +/* + * internal BIO: + * # define BIO_CTRL_SET_KTLS_SEND 72 + * # define BIO_CTRL_SET_KTLS_SEND_CTRL_MSG 74 + * # define BIO_CTRL_CLEAR_KTLS_CTRL_MSG 75 + */ + +# define BIO_CTRL_GET_KTLS_SEND 73 +# define BIO_CTRL_GET_KTLS_RECV 76 + +# define BIO_CTRL_DGRAM_SCTP_WAIT_FOR_DRY 77 +# define BIO_CTRL_DGRAM_SCTP_MSG_WAITING 78 + +/* BIO_f_prefix controls */ +# define BIO_CTRL_SET_PREFIX 79 +# define BIO_CTRL_SET_INDENT 80 +# define BIO_CTRL_GET_INDENT 81 + +# define BIO_CTRL_DGRAM_GET_LOCAL_ADDR_CAP 82 +# define BIO_CTRL_DGRAM_GET_LOCAL_ADDR_ENABLE 83 +# define BIO_CTRL_DGRAM_SET_LOCAL_ADDR_ENABLE 84 +# define BIO_CTRL_DGRAM_GET_EFFECTIVE_CAPS 85 +# define BIO_CTRL_DGRAM_GET_CAPS 86 +# define BIO_CTRL_DGRAM_SET_CAPS 87 +# define BIO_CTRL_DGRAM_GET_NO_TRUNC 88 +# define BIO_CTRL_DGRAM_SET_NO_TRUNC 89 + +/* + * internal BIO: + * # define BIO_CTRL_SET_KTLS_TX_ZEROCOPY_SENDFILE 90 + */ + +# define BIO_CTRL_GET_RPOLL_DESCRIPTOR 91 +# define BIO_CTRL_GET_WPOLL_DESCRIPTOR 92 +# define BIO_CTRL_DGRAM_DETECT_PEER_ADDR 93 + +# define BIO_DGRAM_CAP_NONE 0U +# define BIO_DGRAM_CAP_HANDLES_SRC_ADDR (1U << 0) +# define BIO_DGRAM_CAP_HANDLES_DST_ADDR (1U << 1) +# define BIO_DGRAM_CAP_PROVIDES_SRC_ADDR (1U << 2) +# define BIO_DGRAM_CAP_PROVIDES_DST_ADDR (1U << 3) + +# ifndef OPENSSL_NO_KTLS +# define BIO_get_ktls_send(b) \ + (BIO_ctrl(b, BIO_CTRL_GET_KTLS_SEND, 0, NULL) > 0) +# define BIO_get_ktls_recv(b) \ + (BIO_ctrl(b, BIO_CTRL_GET_KTLS_RECV, 0, NULL) > 0) +# else +# define BIO_get_ktls_send(b) (0) +# define BIO_get_ktls_recv(b) (0) +# endif + +/* modifiers */ +# define BIO_FP_READ 0x02 +# define BIO_FP_WRITE 0x04 +# define BIO_FP_APPEND 0x08 +# define BIO_FP_TEXT 0x10 + +# define BIO_FLAGS_READ 0x01 +# define BIO_FLAGS_WRITE 0x02 +# define BIO_FLAGS_IO_SPECIAL 0x04 +# define BIO_FLAGS_RWS (BIO_FLAGS_READ|BIO_FLAGS_WRITE|BIO_FLAGS_IO_SPECIAL) +# define BIO_FLAGS_SHOULD_RETRY 0x08 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* This #define was replaced by an internal constant and should not be used. */ +# define BIO_FLAGS_UPLINK 0 +# endif + +# define BIO_FLAGS_BASE64_NO_NL 0x100 + +/* + * This is used with memory BIOs: + * BIO_FLAGS_MEM_RDONLY means we shouldn't free up or change the data in any way; + * BIO_FLAGS_NONCLEAR_RST means we shouldn't clear data on reset. + */ +# define BIO_FLAGS_MEM_RDONLY 0x200 +# define BIO_FLAGS_NONCLEAR_RST 0x400 +# define BIO_FLAGS_IN_EOF 0x800 + +/* the BIO FLAGS values 0x1000 to 0x8000 are reserved for internal KTLS flags */ + +typedef union bio_addr_st BIO_ADDR; +typedef struct bio_addrinfo_st BIO_ADDRINFO; + +int BIO_get_new_index(void); +void BIO_set_flags(BIO *b, int flags); +int BIO_test_flags(const BIO *b, int flags); +void BIO_clear_flags(BIO *b, int flags); + +# define BIO_get_flags(b) BIO_test_flags(b, ~(0x0)) +# define BIO_set_retry_special(b) \ + BIO_set_flags(b, (BIO_FLAGS_IO_SPECIAL|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_set_retry_read(b) \ + BIO_set_flags(b, (BIO_FLAGS_READ|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_set_retry_write(b) \ + BIO_set_flags(b, (BIO_FLAGS_WRITE|BIO_FLAGS_SHOULD_RETRY)) + +/* These are normally used internally in BIOs */ +# define BIO_clear_retry_flags(b) \ + BIO_clear_flags(b, (BIO_FLAGS_RWS|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_get_retry_flags(b) \ + BIO_test_flags(b, (BIO_FLAGS_RWS|BIO_FLAGS_SHOULD_RETRY)) + +/* These should be used by the application to tell why we should retry */ +# define BIO_should_read(a) BIO_test_flags(a, BIO_FLAGS_READ) +# define BIO_should_write(a) BIO_test_flags(a, BIO_FLAGS_WRITE) +# define BIO_should_io_special(a) BIO_test_flags(a, BIO_FLAGS_IO_SPECIAL) +# define BIO_retry_type(a) BIO_test_flags(a, BIO_FLAGS_RWS) +# define BIO_should_retry(a) BIO_test_flags(a, BIO_FLAGS_SHOULD_RETRY) + +/* + * The next three are used in conjunction with the BIO_should_io_special() + * condition. After this returns true, BIO *BIO_get_retry_BIO(BIO *bio, int + * *reason); will walk the BIO stack and return the 'reason' for the special + * and the offending BIO. Given a BIO, BIO_get_retry_reason(bio) will return + * the code. + */ +/* + * Returned from the SSL bio when the certificate retrieval code had an error + */ +# define BIO_RR_SSL_X509_LOOKUP 0x01 +/* Returned from the connect BIO when a connect would have blocked */ +# define BIO_RR_CONNECT 0x02 +/* Returned from the accept BIO when an accept would have blocked */ +# define BIO_RR_ACCEPT 0x03 + +/* These are passed by the BIO callback */ +# define BIO_CB_FREE 0x01 +# define BIO_CB_READ 0x02 +# define BIO_CB_WRITE 0x03 +# define BIO_CB_PUTS 0x04 +# define BIO_CB_GETS 0x05 +# define BIO_CB_CTRL 0x06 +# define BIO_CB_RECVMMSG 0x07 +# define BIO_CB_SENDMMSG 0x08 + +/* + * The callback is called before and after the underling operation, The + * BIO_CB_RETURN flag indicates if it is after the call + */ +# define BIO_CB_RETURN 0x80 +# define BIO_CB_return(a) ((a)|BIO_CB_RETURN) +# define BIO_cb_pre(a) (!((a)&BIO_CB_RETURN)) +# define BIO_cb_post(a) ((a)&BIO_CB_RETURN) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +typedef long (*BIO_callback_fn)(BIO *b, int oper, const char *argp, int argi, + long argl, long ret); +OSSL_DEPRECATEDIN_3_0 BIO_callback_fn BIO_get_callback(const BIO *b); +OSSL_DEPRECATEDIN_3_0 void BIO_set_callback(BIO *b, BIO_callback_fn callback); +OSSL_DEPRECATEDIN_3_0 long BIO_debug_callback(BIO *bio, int cmd, + const char *argp, int argi, + long argl, long ret); +# endif + +typedef long (*BIO_callback_fn_ex)(BIO *b, int oper, const char *argp, + size_t len, int argi, + long argl, int ret, size_t *processed); +BIO_callback_fn_ex BIO_get_callback_ex(const BIO *b); +void BIO_set_callback_ex(BIO *b, BIO_callback_fn_ex callback); +long BIO_debug_callback_ex(BIO *bio, int oper, const char *argp, size_t len, + int argi, long argl, int ret, size_t *processed); + +char *BIO_get_callback_arg(const BIO *b); +void BIO_set_callback_arg(BIO *b, char *arg); + +typedef struct bio_method_st BIO_METHOD; + +const char *BIO_method_name(const BIO *b); +int BIO_method_type(const BIO *b); + +typedef int BIO_info_cb(BIO *, int, int); +typedef BIO_info_cb bio_info_cb; /* backward compatibility */ + +SKM_DEFINE_STACK_OF_INTERNAL(BIO, BIO, BIO) +#define sk_BIO_num(sk) OPENSSL_sk_num(ossl_check_const_BIO_sk_type(sk)) +#define sk_BIO_value(sk, idx) ((BIO *)OPENSSL_sk_value(ossl_check_const_BIO_sk_type(sk), (idx))) +#define sk_BIO_new(cmp) ((STACK_OF(BIO) *)OPENSSL_sk_new(ossl_check_BIO_compfunc_type(cmp))) +#define sk_BIO_new_null() ((STACK_OF(BIO) *)OPENSSL_sk_new_null()) +#define sk_BIO_new_reserve(cmp, n) ((STACK_OF(BIO) *)OPENSSL_sk_new_reserve(ossl_check_BIO_compfunc_type(cmp), (n))) +#define sk_BIO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_BIO_sk_type(sk), (n)) +#define sk_BIO_free(sk) OPENSSL_sk_free(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_zero(sk) OPENSSL_sk_zero(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_delete(sk, i) ((BIO *)OPENSSL_sk_delete(ossl_check_BIO_sk_type(sk), (i))) +#define sk_BIO_delete_ptr(sk, ptr) ((BIO *)OPENSSL_sk_delete_ptr(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr))) +#define sk_BIO_push(sk, ptr) OPENSSL_sk_push(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_pop(sk) ((BIO *)OPENSSL_sk_pop(ossl_check_BIO_sk_type(sk))) +#define sk_BIO_shift(sk) ((BIO *)OPENSSL_sk_shift(ossl_check_BIO_sk_type(sk))) +#define sk_BIO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_BIO_sk_type(sk),ossl_check_BIO_freefunc_type(freefunc)) +#define sk_BIO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr), (idx)) +#define sk_BIO_set(sk, idx, ptr) ((BIO *)OPENSSL_sk_set(ossl_check_BIO_sk_type(sk), (idx), ossl_check_BIO_type(ptr))) +#define sk_BIO_find(sk, ptr) OPENSSL_sk_find(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr), pnum) +#define sk_BIO_sort(sk) OPENSSL_sk_sort(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_BIO_sk_type(sk)) +#define sk_BIO_dup(sk) ((STACK_OF(BIO) *)OPENSSL_sk_dup(ossl_check_const_BIO_sk_type(sk))) +#define sk_BIO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(BIO) *)OPENSSL_sk_deep_copy(ossl_check_const_BIO_sk_type(sk), ossl_check_BIO_copyfunc_type(copyfunc), ossl_check_BIO_freefunc_type(freefunc))) +#define sk_BIO_set_cmp_func(sk, cmp) ((sk_BIO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_BIO_sk_type(sk), ossl_check_BIO_compfunc_type(cmp))) + + + +/* Prefix and suffix callback in ASN1 BIO */ +typedef int asn1_ps_func (BIO *b, unsigned char **pbuf, int *plen, + void *parg); + +typedef void (*BIO_dgram_sctp_notification_handler_fn) (BIO *b, + void *context, + void *buf); +# ifndef OPENSSL_NO_SCTP +/* SCTP parameter structs */ +struct bio_dgram_sctp_sndinfo { + uint16_t snd_sid; + uint16_t snd_flags; + uint32_t snd_ppid; + uint32_t snd_context; +}; + +struct bio_dgram_sctp_rcvinfo { + uint16_t rcv_sid; + uint16_t rcv_ssn; + uint16_t rcv_flags; + uint32_t rcv_ppid; + uint32_t rcv_tsn; + uint32_t rcv_cumtsn; + uint32_t rcv_context; +}; + +struct bio_dgram_sctp_prinfo { + uint16_t pr_policy; + uint32_t pr_value; +}; +# endif + +/* BIO_sendmmsg/BIO_recvmmsg-related definitions */ +typedef struct bio_msg_st { + void *data; + size_t data_len; + BIO_ADDR *peer, *local; + uint64_t flags; +} BIO_MSG; + +typedef struct bio_mmsg_cb_args_st { + BIO_MSG *msg; + size_t stride, num_msg; + uint64_t flags; + size_t *msgs_processed; +} BIO_MMSG_CB_ARGS; + +#define BIO_POLL_DESCRIPTOR_TYPE_NONE 0 +#define BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD 1 +#define BIO_POLL_DESCRIPTOR_CUSTOM_START 8192 + +typedef struct bio_poll_descriptor_st { + uint32_t type; + union { + int fd; + void *custom; + uintptr_t custom_ui; + } value; +} BIO_POLL_DESCRIPTOR; + +/* + * #define BIO_CONN_get_param_hostname BIO_ctrl + */ + +# define BIO_C_SET_CONNECT 100 +# define BIO_C_DO_STATE_MACHINE 101 +# define BIO_C_SET_NBIO 102 +/* # define BIO_C_SET_PROXY_PARAM 103 */ +# define BIO_C_SET_FD 104 +# define BIO_C_GET_FD 105 +# define BIO_C_SET_FILE_PTR 106 +# define BIO_C_GET_FILE_PTR 107 +# define BIO_C_SET_FILENAME 108 +# define BIO_C_SET_SSL 109 +# define BIO_C_GET_SSL 110 +# define BIO_C_SET_MD 111 +# define BIO_C_GET_MD 112 +# define BIO_C_GET_CIPHER_STATUS 113 +# define BIO_C_SET_BUF_MEM 114 +# define BIO_C_GET_BUF_MEM_PTR 115 +# define BIO_C_GET_BUFF_NUM_LINES 116 +# define BIO_C_SET_BUFF_SIZE 117 +# define BIO_C_SET_ACCEPT 118 +# define BIO_C_SSL_MODE 119 +# define BIO_C_GET_MD_CTX 120 +/* # define BIO_C_GET_PROXY_PARAM 121 */ +# define BIO_C_SET_BUFF_READ_DATA 122/* data to read first */ +# define BIO_C_GET_CONNECT 123 +# define BIO_C_GET_ACCEPT 124 +# define BIO_C_SET_SSL_RENEGOTIATE_BYTES 125 +# define BIO_C_GET_SSL_NUM_RENEGOTIATES 126 +# define BIO_C_SET_SSL_RENEGOTIATE_TIMEOUT 127 +# define BIO_C_FILE_SEEK 128 +# define BIO_C_GET_CIPHER_CTX 129 +# define BIO_C_SET_BUF_MEM_EOF_RETURN 130/* return end of input + * value */ +# define BIO_C_SET_BIND_MODE 131 +# define BIO_C_GET_BIND_MODE 132 +# define BIO_C_FILE_TELL 133 +# define BIO_C_GET_SOCKS 134 +# define BIO_C_SET_SOCKS 135 + +# define BIO_C_SET_WRITE_BUF_SIZE 136/* for BIO_s_bio */ +# define BIO_C_GET_WRITE_BUF_SIZE 137 +# define BIO_C_MAKE_BIO_PAIR 138 +# define BIO_C_DESTROY_BIO_PAIR 139 +# define BIO_C_GET_WRITE_GUARANTEE 140 +# define BIO_C_GET_READ_REQUEST 141 +# define BIO_C_SHUTDOWN_WR 142 +# define BIO_C_NREAD0 143 +# define BIO_C_NREAD 144 +# define BIO_C_NWRITE0 145 +# define BIO_C_NWRITE 146 +# define BIO_C_RESET_READ_REQUEST 147 +# define BIO_C_SET_MD_CTX 148 + +# define BIO_C_SET_PREFIX 149 +# define BIO_C_GET_PREFIX 150 +# define BIO_C_SET_SUFFIX 151 +# define BIO_C_GET_SUFFIX 152 + +# define BIO_C_SET_EX_ARG 153 +# define BIO_C_GET_EX_ARG 154 + +# define BIO_C_SET_CONNECT_MODE 155 + +# define BIO_C_SET_TFO 156 /* like BIO_C_SET_NBIO */ + +# define BIO_C_SET_SOCK_TYPE 157 +# define BIO_C_GET_SOCK_TYPE 158 +# define BIO_C_GET_DGRAM_BIO 159 + +# define BIO_set_app_data(s,arg) BIO_set_ex_data(s,0,arg) +# define BIO_get_app_data(s) BIO_get_ex_data(s,0) + +# define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) +# define BIO_set_tfo(b,n) BIO_ctrl(b,BIO_C_SET_TFO,(n),NULL) + +# ifndef OPENSSL_NO_SOCK +/* IP families we support, for BIO_s_connect() and BIO_s_accept() */ +/* Note: the underlying operating system may not support some of them */ +# define BIO_FAMILY_IPV4 4 +# define BIO_FAMILY_IPV6 6 +# define BIO_FAMILY_IPANY 256 + +/* BIO_s_connect() */ +# define BIO_set_conn_hostname(b,name) BIO_ctrl(b,BIO_C_SET_CONNECT,0, \ + (char *)(name)) +# define BIO_set_conn_port(b,port) BIO_ctrl(b,BIO_C_SET_CONNECT,1, \ + (char *)(port)) +# define BIO_set_conn_address(b,addr) BIO_ctrl(b,BIO_C_SET_CONNECT,2, \ + (char *)(addr)) +# define BIO_set_conn_ip_family(b,f) BIO_int_ctrl(b,BIO_C_SET_CONNECT,3,f) +# define BIO_get_conn_hostname(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,0)) +# define BIO_get_conn_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,1)) +# define BIO_get_conn_address(b) ((const BIO_ADDR *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,2)) +# define BIO_get_conn_ip_family(b) BIO_ctrl(b,BIO_C_GET_CONNECT,3,NULL) +# define BIO_get_conn_mode(b) BIO_ctrl(b,BIO_C_GET_CONNECT,4,NULL) +# define BIO_set_conn_mode(b,n) BIO_ctrl(b,BIO_C_SET_CONNECT_MODE,(n),NULL) +# define BIO_set_sock_type(b,t) BIO_ctrl(b,BIO_C_SET_SOCK_TYPE,(t),NULL) +# define BIO_get_sock_type(b) BIO_ctrl(b,BIO_C_GET_SOCK_TYPE,0,NULL) +# define BIO_get0_dgram_bio(b, p) BIO_ctrl(b,BIO_C_GET_DGRAM_BIO,0,(void *)(BIO **)(p)) + +/* BIO_s_accept() */ +# define BIO_set_accept_name(b,name) BIO_ctrl(b,BIO_C_SET_ACCEPT,0, \ + (char *)(name)) +# define BIO_set_accept_port(b,port) BIO_ctrl(b,BIO_C_SET_ACCEPT,1, \ + (char *)(port)) +# define BIO_get_accept_name(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,0)) +# define BIO_get_accept_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,1)) +# define BIO_get_peer_name(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,2)) +# define BIO_get_peer_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,3)) +/* #define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) */ +# define BIO_set_nbio_accept(b,n) BIO_ctrl(b,BIO_C_SET_ACCEPT,2,(n)?(void *)"a":NULL) +# define BIO_set_accept_bios(b,bio) BIO_ctrl(b,BIO_C_SET_ACCEPT,3, \ + (char *)(bio)) +# define BIO_set_accept_ip_family(b,f) BIO_int_ctrl(b,BIO_C_SET_ACCEPT,4,f) +# define BIO_get_accept_ip_family(b) BIO_ctrl(b,BIO_C_GET_ACCEPT,4,NULL) +# define BIO_set_tfo_accept(b,n) BIO_ctrl(b,BIO_C_SET_ACCEPT,5,(n)?(void *)"a":NULL) + +/* Aliases kept for backward compatibility */ +# define BIO_BIND_NORMAL 0 +# define BIO_BIND_REUSEADDR BIO_SOCK_REUSEADDR +# define BIO_BIND_REUSEADDR_IF_UNUSED BIO_SOCK_REUSEADDR +# define BIO_set_bind_mode(b,mode) BIO_ctrl(b,BIO_C_SET_BIND_MODE,mode,NULL) +# define BIO_get_bind_mode(b) BIO_ctrl(b,BIO_C_GET_BIND_MODE,0,NULL) +# endif /* OPENSSL_NO_SOCK */ + +# define BIO_do_connect(b) BIO_do_handshake(b) +# define BIO_do_accept(b) BIO_do_handshake(b) + +# define BIO_do_handshake(b) BIO_ctrl(b,BIO_C_DO_STATE_MACHINE,0,NULL) + +/* BIO_s_datagram(), BIO_s_fd(), BIO_s_socket(), BIO_s_accept() and BIO_s_connect() */ +# define BIO_set_fd(b,fd,c) BIO_int_ctrl(b,BIO_C_SET_FD,c,fd) +# define BIO_get_fd(b,c) BIO_ctrl(b,BIO_C_GET_FD,0,(char *)(c)) + +/* BIO_s_file() */ +# define BIO_set_fp(b,fp,c) BIO_ctrl(b,BIO_C_SET_FILE_PTR,c,(char *)(fp)) +# define BIO_get_fp(b,fpp) BIO_ctrl(b,BIO_C_GET_FILE_PTR,0,(char *)(fpp)) + +/* BIO_s_fd() and BIO_s_file() */ +# define BIO_seek(b,ofs) (int)BIO_ctrl(b,BIO_C_FILE_SEEK,ofs,NULL) +# define BIO_tell(b) (int)BIO_ctrl(b,BIO_C_FILE_TELL,0,NULL) + +/* + * name is cast to lose const, but might be better to route through a + * function so we can do it safely + */ +# ifdef CONST_STRICT +/* + * If you are wondering why this isn't defined, its because CONST_STRICT is + * purely a compile-time kludge to allow const to be checked. + */ +int BIO_read_filename(BIO *b, const char *name); +# else +# define BIO_read_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_READ,(char *)(name)) +# endif +# define BIO_write_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_WRITE,name) +# define BIO_append_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_APPEND,name) +# define BIO_rw_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_READ|BIO_FP_WRITE,name) + +/* + * WARNING WARNING, this ups the reference count on the read bio of the SSL + * structure. This is because the ssl read BIO is now pointed to by the + * next_bio field in the bio. So when you free the BIO, make sure you are + * doing a BIO_free_all() to catch the underlying BIO. + */ +# define BIO_set_ssl(b,ssl,c) BIO_ctrl(b,BIO_C_SET_SSL,c,(char *)(ssl)) +# define BIO_get_ssl(b,sslp) BIO_ctrl(b,BIO_C_GET_SSL,0,(char *)(sslp)) +# define BIO_set_ssl_mode(b,client) BIO_ctrl(b,BIO_C_SSL_MODE,client,NULL) +# define BIO_set_ssl_renegotiate_bytes(b,num) \ + BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_BYTES,num,NULL) +# define BIO_get_num_renegotiates(b) \ + BIO_ctrl(b,BIO_C_GET_SSL_NUM_RENEGOTIATES,0,NULL) +# define BIO_set_ssl_renegotiate_timeout(b,seconds) \ + BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_TIMEOUT,seconds,NULL) + +/* defined in evp.h */ +/* #define BIO_set_md(b,md) BIO_ctrl(b,BIO_C_SET_MD,1,(char *)(md)) */ + +# define BIO_get_mem_data(b,pp) BIO_ctrl(b,BIO_CTRL_INFO,0,(char *)(pp)) +# define BIO_set_mem_buf(b,bm,c) BIO_ctrl(b,BIO_C_SET_BUF_MEM,c,(char *)(bm)) +# define BIO_get_mem_ptr(b,pp) BIO_ctrl(b,BIO_C_GET_BUF_MEM_PTR,0, \ + (char *)(pp)) +# define BIO_set_mem_eof_return(b,v) \ + BIO_ctrl(b,BIO_C_SET_BUF_MEM_EOF_RETURN,v,NULL) + +/* For the BIO_f_buffer() type */ +# define BIO_get_buffer_num_lines(b) BIO_ctrl(b,BIO_C_GET_BUFF_NUM_LINES,0,NULL) +# define BIO_set_buffer_size(b,size) BIO_ctrl(b,BIO_C_SET_BUFF_SIZE,size,NULL) +# define BIO_set_read_buffer_size(b,size) BIO_int_ctrl(b,BIO_C_SET_BUFF_SIZE,size,0) +# define BIO_set_write_buffer_size(b,size) BIO_int_ctrl(b,BIO_C_SET_BUFF_SIZE,size,1) +# define BIO_set_buffer_read_data(b,buf,num) BIO_ctrl(b,BIO_C_SET_BUFF_READ_DATA,num,buf) + +/* Don't use the next one unless you know what you are doing :-) */ +# define BIO_dup_state(b,ret) BIO_ctrl(b,BIO_CTRL_DUP,0,(char *)(ret)) + +# define BIO_reset(b) (int)BIO_ctrl(b,BIO_CTRL_RESET,0,NULL) +# define BIO_eof(b) (int)BIO_ctrl(b,BIO_CTRL_EOF,0,NULL) +# define BIO_set_close(b,c) (int)BIO_ctrl(b,BIO_CTRL_SET_CLOSE,(c),NULL) +# define BIO_get_close(b) (int)BIO_ctrl(b,BIO_CTRL_GET_CLOSE,0,NULL) +# define BIO_pending(b) (int)BIO_ctrl(b,BIO_CTRL_PENDING,0,NULL) +# define BIO_wpending(b) (int)BIO_ctrl(b,BIO_CTRL_WPENDING,0,NULL) +/* ...pending macros have inappropriate return type */ +size_t BIO_ctrl_pending(BIO *b); +size_t BIO_ctrl_wpending(BIO *b); +# define BIO_flush(b) (int)BIO_ctrl(b,BIO_CTRL_FLUSH,0,NULL) +# define BIO_get_info_callback(b,cbp) (int)BIO_ctrl(b,BIO_CTRL_GET_CALLBACK,0, \ + cbp) +# define BIO_set_info_callback(b,cb) (int)BIO_callback_ctrl(b,BIO_CTRL_SET_CALLBACK,cb) + +/* For the BIO_f_buffer() type */ +# define BIO_buffer_get_num_lines(b) BIO_ctrl(b,BIO_CTRL_GET,0,NULL) +# define BIO_buffer_peek(b,s,l) BIO_ctrl(b,BIO_CTRL_PEEK,(l),(s)) + +/* For BIO_s_bio() */ +# define BIO_set_write_buf_size(b,size) (int)BIO_ctrl(b,BIO_C_SET_WRITE_BUF_SIZE,size,NULL) +# define BIO_get_write_buf_size(b,size) (size_t)BIO_ctrl(b,BIO_C_GET_WRITE_BUF_SIZE,size,NULL) +# define BIO_make_bio_pair(b1,b2) (int)BIO_ctrl(b1,BIO_C_MAKE_BIO_PAIR,0,b2) +# define BIO_destroy_bio_pair(b) (int)BIO_ctrl(b,BIO_C_DESTROY_BIO_PAIR,0,NULL) +# define BIO_shutdown_wr(b) (int)BIO_ctrl(b, BIO_C_SHUTDOWN_WR, 0, NULL) +/* macros with inappropriate type -- but ...pending macros use int too: */ +# define BIO_get_write_guarantee(b) (int)BIO_ctrl(b,BIO_C_GET_WRITE_GUARANTEE,0,NULL) +# define BIO_get_read_request(b) (int)BIO_ctrl(b,BIO_C_GET_READ_REQUEST,0,NULL) +size_t BIO_ctrl_get_write_guarantee(BIO *b); +size_t BIO_ctrl_get_read_request(BIO *b); +int BIO_ctrl_reset_read_request(BIO *b); + +/* ctrl macros for dgram */ +# define BIO_ctrl_dgram_connect(b,peer) \ + (int)BIO_ctrl(b,BIO_CTRL_DGRAM_CONNECT,0, (char *)(peer)) +# define BIO_ctrl_set_connected(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_SET_CONNECTED, 0, (char *)(peer)) +# define BIO_dgram_recv_timedout(b) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_RECV_TIMER_EXP, 0, NULL) +# define BIO_dgram_send_timedout(b) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_SEND_TIMER_EXP, 0, NULL) +# define BIO_dgram_get_peer(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_PEER, 0, (char *)(peer)) +# define BIO_dgram_set_peer(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_SET_PEER, 0, (char *)(peer)) +# define BIO_dgram_detect_peer_addr(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_DETECT_PEER_ADDR, 0, (char *)(peer)) +# define BIO_dgram_get_mtu_overhead(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU_OVERHEAD, 0, NULL) +# define BIO_dgram_get_local_addr_cap(b) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_LOCAL_ADDR_CAP, 0, NULL) +# define BIO_dgram_get_local_addr_enable(b, penable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_LOCAL_ADDR_ENABLE, 0, (char *)(penable)) +# define BIO_dgram_set_local_addr_enable(b, enable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_LOCAL_ADDR_ENABLE, (enable), NULL) +# define BIO_dgram_get_effective_caps(b) \ + (uint32_t)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_EFFECTIVE_CAPS, 0, NULL) +# define BIO_dgram_get_caps(b) \ + (uint32_t)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_CAPS, 0, NULL) +# define BIO_dgram_set_caps(b, caps) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_CAPS, (long)(caps), NULL) +# define BIO_dgram_get_no_trunc(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_NO_TRUNC, 0, NULL) +# define BIO_dgram_set_no_trunc(b, enable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_NO_TRUNC, (enable), NULL) +# define BIO_dgram_get_mtu(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU, 0, NULL) +# define BIO_dgram_set_mtu(b, mtu) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_MTU, (mtu), NULL) + +/* ctrl macros for BIO_f_prefix */ +# define BIO_set_prefix(b,p) BIO_ctrl((b), BIO_CTRL_SET_PREFIX, 0, (void *)(p)) +# define BIO_set_indent(b,i) BIO_ctrl((b), BIO_CTRL_SET_INDENT, (i), NULL) +# define BIO_get_indent(b) BIO_ctrl((b), BIO_CTRL_GET_INDENT, 0, NULL) + +#define BIO_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_BIO, l, p, newf, dupf, freef) +int BIO_set_ex_data(BIO *bio, int idx, void *data); +void *BIO_get_ex_data(const BIO *bio, int idx); +uint64_t BIO_number_read(BIO *bio); +uint64_t BIO_number_written(BIO *bio); + +/* For BIO_f_asn1() */ +int BIO_asn1_set_prefix(BIO *b, asn1_ps_func *prefix, + asn1_ps_func *prefix_free); +int BIO_asn1_get_prefix(BIO *b, asn1_ps_func **pprefix, + asn1_ps_func **pprefix_free); +int BIO_asn1_set_suffix(BIO *b, asn1_ps_func *suffix, + asn1_ps_func *suffix_free); +int BIO_asn1_get_suffix(BIO *b, asn1_ps_func **psuffix, + asn1_ps_func **psuffix_free); + +const BIO_METHOD *BIO_s_file(void); +BIO *BIO_new_file(const char *filename, const char *mode); +BIO *BIO_new_from_core_bio(OSSL_LIB_CTX *libctx, OSSL_CORE_BIO *corebio); +# ifndef OPENSSL_NO_STDIO +BIO *BIO_new_fp(FILE *stream, int close_flag); +# endif +BIO *BIO_new_ex(OSSL_LIB_CTX *libctx, const BIO_METHOD *method); +BIO *BIO_new(const BIO_METHOD *type); +int BIO_free(BIO *a); +void BIO_set_data(BIO *a, void *ptr); +void *BIO_get_data(BIO *a); +void BIO_set_init(BIO *a, int init); +int BIO_get_init(BIO *a); +void BIO_set_shutdown(BIO *a, int shut); +int BIO_get_shutdown(BIO *a); +void BIO_vfree(BIO *a); +int BIO_up_ref(BIO *a); +int BIO_read(BIO *b, void *data, int dlen); +int BIO_read_ex(BIO *b, void *data, size_t dlen, size_t *readbytes); +__owur int BIO_recvmmsg(BIO *b, BIO_MSG *msg, + size_t stride, size_t num_msg, uint64_t flags, + size_t *msgs_processed); +int BIO_gets(BIO *bp, char *buf, int size); +int BIO_get_line(BIO *bio, char *buf, int size); +int BIO_write(BIO *b, const void *data, int dlen); +int BIO_write_ex(BIO *b, const void *data, size_t dlen, size_t *written); +__owur int BIO_sendmmsg(BIO *b, BIO_MSG *msg, + size_t stride, size_t num_msg, uint64_t flags, + size_t *msgs_processed); +__owur int BIO_get_rpoll_descriptor(BIO *b, BIO_POLL_DESCRIPTOR *desc); +__owur int BIO_get_wpoll_descriptor(BIO *b, BIO_POLL_DESCRIPTOR *desc); +int BIO_puts(BIO *bp, const char *buf); +int BIO_indent(BIO *b, int indent, int max); +long BIO_ctrl(BIO *bp, int cmd, long larg, void *parg); +long BIO_callback_ctrl(BIO *b, int cmd, BIO_info_cb *fp); +void *BIO_ptr_ctrl(BIO *bp, int cmd, long larg); +long BIO_int_ctrl(BIO *bp, int cmd, long larg, int iarg); +BIO *BIO_push(BIO *b, BIO *append); +BIO *BIO_pop(BIO *b); +void BIO_free_all(BIO *a); +BIO *BIO_find_type(BIO *b, int bio_type); +BIO *BIO_next(BIO *b); +void BIO_set_next(BIO *b, BIO *next); +BIO *BIO_get_retry_BIO(BIO *bio, int *reason); +int BIO_get_retry_reason(BIO *bio); +void BIO_set_retry_reason(BIO *bio, int reason); +BIO *BIO_dup_chain(BIO *in); + +int BIO_nread0(BIO *bio, char **buf); +int BIO_nread(BIO *bio, char **buf, int num); +int BIO_nwrite0(BIO *bio, char **buf); +int BIO_nwrite(BIO *bio, char **buf, int num); + +const BIO_METHOD *BIO_s_mem(void); +# ifndef OPENSSL_NO_DGRAM +const BIO_METHOD *BIO_s_dgram_mem(void); +# endif +const BIO_METHOD *BIO_s_secmem(void); +BIO *BIO_new_mem_buf(const void *buf, int len); +# ifndef OPENSSL_NO_SOCK +const BIO_METHOD *BIO_s_socket(void); +const BIO_METHOD *BIO_s_connect(void); +const BIO_METHOD *BIO_s_accept(void); +# endif +const BIO_METHOD *BIO_s_fd(void); +const BIO_METHOD *BIO_s_log(void); +const BIO_METHOD *BIO_s_bio(void); +const BIO_METHOD *BIO_s_null(void); +const BIO_METHOD *BIO_f_null(void); +const BIO_METHOD *BIO_f_buffer(void); +const BIO_METHOD *BIO_f_readbuffer(void); +const BIO_METHOD *BIO_f_linebuffer(void); +const BIO_METHOD *BIO_f_nbio_test(void); +const BIO_METHOD *BIO_f_prefix(void); +const BIO_METHOD *BIO_s_core(void); +# ifndef OPENSSL_NO_DGRAM +const BIO_METHOD *BIO_s_dgram_pair(void); +const BIO_METHOD *BIO_s_datagram(void); +int BIO_dgram_non_fatal_error(int error); +BIO *BIO_new_dgram(int fd, int close_flag); +# ifndef OPENSSL_NO_SCTP +const BIO_METHOD *BIO_s_datagram_sctp(void); +BIO *BIO_new_dgram_sctp(int fd, int close_flag); +int BIO_dgram_is_sctp(BIO *bio); +int BIO_dgram_sctp_notification_cb(BIO *b, + BIO_dgram_sctp_notification_handler_fn handle_notifications, + void *context); +int BIO_dgram_sctp_wait_for_dry(BIO *b); +int BIO_dgram_sctp_msg_waiting(BIO *b); +# endif +# endif + +# ifndef OPENSSL_NO_SOCK +int BIO_sock_should_retry(int i); +int BIO_sock_non_fatal_error(int error); +int BIO_err_is_non_fatal(unsigned int errcode); +int BIO_socket_wait(int fd, int for_read, time_t max_time); +# endif +int BIO_wait(BIO *bio, time_t max_time, unsigned int nap_milliseconds); +int BIO_do_connect_retry(BIO *bio, int timeout, int nap_milliseconds); + +int BIO_fd_should_retry(int i); +int BIO_fd_non_fatal_error(int error); +int BIO_dump_cb(int (*cb) (const void *data, size_t len, void *u), + void *u, const void *s, int len); +int BIO_dump_indent_cb(int (*cb) (const void *data, size_t len, void *u), + void *u, const void *s, int len, int indent); +int BIO_dump(BIO *b, const void *bytes, int len); +int BIO_dump_indent(BIO *b, const void *bytes, int len, int indent); +# ifndef OPENSSL_NO_STDIO +int BIO_dump_fp(FILE *fp, const void *s, int len); +int BIO_dump_indent_fp(FILE *fp, const void *s, int len, int indent); +# endif +int BIO_hex_string(BIO *out, int indent, int width, const void *data, + int datalen); + +# ifndef OPENSSL_NO_SOCK +BIO_ADDR *BIO_ADDR_new(void); +int BIO_ADDR_copy(BIO_ADDR *dst, const BIO_ADDR *src); +BIO_ADDR *BIO_ADDR_dup(const BIO_ADDR *ap); +int BIO_ADDR_rawmake(BIO_ADDR *ap, int family, + const void *where, size_t wherelen, unsigned short port); +void BIO_ADDR_free(BIO_ADDR *); +void BIO_ADDR_clear(BIO_ADDR *ap); +int BIO_ADDR_family(const BIO_ADDR *ap); +int BIO_ADDR_rawaddress(const BIO_ADDR *ap, void *p, size_t *l); +unsigned short BIO_ADDR_rawport(const BIO_ADDR *ap); +char *BIO_ADDR_hostname_string(const BIO_ADDR *ap, int numeric); +char *BIO_ADDR_service_string(const BIO_ADDR *ap, int numeric); +char *BIO_ADDR_path_string(const BIO_ADDR *ap); + +const BIO_ADDRINFO *BIO_ADDRINFO_next(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_family(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_socktype(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_protocol(const BIO_ADDRINFO *bai); +const BIO_ADDR *BIO_ADDRINFO_address(const BIO_ADDRINFO *bai); +void BIO_ADDRINFO_free(BIO_ADDRINFO *bai); + +enum BIO_hostserv_priorities { + BIO_PARSE_PRIO_HOST, BIO_PARSE_PRIO_SERV +}; +int BIO_parse_hostserv(const char *hostserv, char **host, char **service, + enum BIO_hostserv_priorities hostserv_prio); +enum BIO_lookup_type { + BIO_LOOKUP_CLIENT, BIO_LOOKUP_SERVER +}; +int BIO_lookup(const char *host, const char *service, + enum BIO_lookup_type lookup_type, + int family, int socktype, BIO_ADDRINFO **res); +int BIO_lookup_ex(const char *host, const char *service, + int lookup_type, int family, int socktype, int protocol, + BIO_ADDRINFO **res); +int BIO_sock_error(int sock); +int BIO_socket_ioctl(int fd, long type, void *arg); +int BIO_socket_nbio(int fd, int mode); +int BIO_sock_init(void); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define BIO_sock_cleanup() while(0) continue +# endif +int BIO_set_tcp_ndelay(int sock, int turn_on); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 struct hostent *BIO_gethostbyname(const char *name); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_port(const char *str, unsigned short *port_ptr); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_host_ip(const char *str, unsigned char *ip); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_accept_socket(char *host_port, int mode); +OSSL_DEPRECATEDIN_1_1_0 int BIO_accept(int sock, char **ip_port); +# endif + +union BIO_sock_info_u { + BIO_ADDR *addr; +}; +enum BIO_sock_info_type { + BIO_SOCK_INFO_ADDRESS +}; +int BIO_sock_info(int sock, + enum BIO_sock_info_type type, union BIO_sock_info_u *info); + +# define BIO_SOCK_REUSEADDR 0x01 +# define BIO_SOCK_V6_ONLY 0x02 +# define BIO_SOCK_KEEPALIVE 0x04 +# define BIO_SOCK_NONBLOCK 0x08 +# define BIO_SOCK_NODELAY 0x10 +# define BIO_SOCK_TFO 0x20 + +int BIO_socket(int domain, int socktype, int protocol, int options); +int BIO_connect(int sock, const BIO_ADDR *addr, int options); +int BIO_bind(int sock, const BIO_ADDR *addr, int options); +int BIO_listen(int sock, const BIO_ADDR *addr, int options); +int BIO_accept_ex(int accept_sock, BIO_ADDR *addr, int options); +int BIO_closesocket(int sock); + +BIO *BIO_new_socket(int sock, int close_flag); +BIO *BIO_new_connect(const char *host_port); +BIO *BIO_new_accept(const char *host_port); +# endif /* OPENSSL_NO_SOCK*/ + +BIO *BIO_new_fd(int fd, int close_flag); + +int BIO_new_bio_pair(BIO **bio1, size_t writebuf1, + BIO **bio2, size_t writebuf2); +# ifndef OPENSSL_NO_DGRAM +int BIO_new_bio_dgram_pair(BIO **bio1, size_t writebuf1, + BIO **bio2, size_t writebuf2); +# endif + +/* + * If successful, returns 1 and in *bio1, *bio2 two BIO pair endpoints. + * Otherwise returns 0 and sets *bio1 and *bio2 to NULL. Size 0 uses default + * value. + */ + +void BIO_copy_next_retry(BIO *b); + +/* + * long BIO_ghbn_ctrl(int cmd,int iarg,char *parg); + */ + +# define ossl_bio__attr__(x) +# if defined(__GNUC__) && defined(__STDC_VERSION__) \ + && !defined(__MINGW32__) && !defined(__MINGW64__) \ + && !defined(__APPLE__) + /* + * Because we support the 'z' modifier, which made its appearance in C99, + * we can't use __attribute__ with pre C99 dialects. + */ +# if __STDC_VERSION__ >= 199901L +# undef ossl_bio__attr__ +# define ossl_bio__attr__ __attribute__ +# if __GNUC__*10 + __GNUC_MINOR__ >= 44 +# define ossl_bio__printf__ __gnu_printf__ +# else +# define ossl_bio__printf__ __printf__ +# endif +# endif +# endif +int BIO_printf(BIO *bio, const char *format, ...) +ossl_bio__attr__((__format__(ossl_bio__printf__, 2, 3))); +int BIO_vprintf(BIO *bio, const char *format, va_list args) +ossl_bio__attr__((__format__(ossl_bio__printf__, 2, 0))); +int BIO_snprintf(char *buf, size_t n, const char *format, ...) +ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 4))); +int BIO_vsnprintf(char *buf, size_t n, const char *format, va_list args) +ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 0))); +# undef ossl_bio__attr__ +# undef ossl_bio__printf__ + + +BIO_METHOD *BIO_meth_new(int type, const char *name); +void BIO_meth_free(BIO_METHOD *biom); +int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *, int); +int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *, size_t, + size_t *); +int BIO_meth_set_write(BIO_METHOD *biom, + int (*write) (BIO *, const char *, int)); +int BIO_meth_set_write_ex(BIO_METHOD *biom, + int (*bwrite) (BIO *, const char *, size_t, size_t *)); +int BIO_meth_set_sendmmsg(BIO_METHOD *biom, + int (*f) (BIO *, BIO_MSG *, size_t, size_t, + uint64_t, size_t *)); +int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, + size_t, size_t, + uint64_t, size_t *); +int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int); +int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *, size_t, size_t *); +int BIO_meth_set_read(BIO_METHOD *biom, + int (*read) (BIO *, char *, int)); +int BIO_meth_set_read_ex(BIO_METHOD *biom, + int (*bread) (BIO *, char *, size_t, size_t *)); +int BIO_meth_set_recvmmsg(BIO_METHOD *biom, + int (*f) (BIO *, BIO_MSG *, size_t, size_t, + uint64_t, size_t *)); +int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, + size_t, size_t, + uint64_t, size_t *); +int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *); +int BIO_meth_set_puts(BIO_METHOD *biom, + int (*puts) (BIO *, const char *)); +int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int); +int BIO_meth_set_gets(BIO_METHOD *biom, + int (*ossl_gets) (BIO *, char *, int)); +long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int, long, void *); +int BIO_meth_set_ctrl(BIO_METHOD *biom, + long (*ctrl) (BIO *, int, long, void *)); +int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *); +int BIO_meth_set_create(BIO_METHOD *biom, int (*create) (BIO *)); +int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *); +int BIO_meth_set_destroy(BIO_METHOD *biom, int (*destroy) (BIO *)); +long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom)) + (BIO *, int, BIO_info_cb *); +int BIO_meth_set_callback_ctrl(BIO_METHOD *biom, + long (*callback_ctrl) (BIO *, int, + BIO_info_cb *)); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h new file mode 100644 index 00000000000..60beffd57ef --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h @@ -0,0 +1,629 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/cmp.h.in + * + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright Nokia 2007-2019 + * Copyright Siemens AG 2015-2019 + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CMP_H +# define OPENSSL_CMP_H + +# include +# ifndef OPENSSL_NO_CMP + +# include +# include +# include +# include + +/* explicit #includes not strictly needed since implied by the above: */ +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# define OSSL_CMP_PVNO_2 2 +# define OSSL_CMP_PVNO_3 3 +# define OSSL_CMP_PVNO OSSL_CMP_PVNO_2 /* v2 is the default */ + +/*- + * PKIFailureInfo ::= BIT STRING { + * -- since we can fail in more than one way! + * -- More codes may be added in the future if/when required. + * badAlg (0), + * -- unrecognized or unsupported Algorithm Identifier + * badMessageCheck (1), + * -- integrity check failed (e.g., signature did not verify) + * badRequest (2), + * -- transaction not permitted or supported + * badTime (3), + * -- messageTime was not sufficiently close to the system time, + * -- as defined by local policy + * badCertId (4), + * -- no certificate could be found matching the provided criteria + * badDataFormat (5), + * -- the data submitted has the wrong format + * wrongAuthority (6), + * -- the authority indicated in the request is different from the + * -- one creating the response token + * incorrectData (7), + * -- the requester's data is incorrect (for notary services) + * missingTimeStamp (8), + * -- when the timestamp is missing but should be there + * -- (by policy) + * badPOP (9), + * -- the proof-of-possession failed + * certRevoked (10), + * -- the certificate has already been revoked + * certConfirmed (11), + * -- the certificate has already been confirmed + * wrongIntegrity (12), + * -- invalid integrity, password based instead of signature or + * -- vice versa + * badRecipientNonce (13), + * -- invalid recipient nonce, either missing or wrong value + * timeNotAvailable (14), + * -- the TSA's time source is not available + * unacceptedPolicy (15), + * -- the requested TSA policy is not supported by the TSA. + * unacceptedExtension (16), + * -- the requested extension is not supported by the TSA. + * addInfoNotAvailable (17), + * -- the additional information requested could not be + * -- understood or is not available + * badSenderNonce (18), + * -- invalid sender nonce, either missing or wrong size + * badCertTemplate (19), + * -- invalid cert. template or missing mandatory information + * signerNotTrusted (20), + * -- signer of the message unknown or not trusted + * transactionIdInUse (21), + * -- the transaction identifier is already in use + * unsupportedVersion (22), + * -- the version of the message is not supported + * notAuthorized (23), + * -- the sender was not authorized to make the preceding + * -- request or perform the preceding action + * systemUnavail (24), + * -- the request cannot be handled due to system unavailability + * systemFailure (25), + * -- the request cannot be handled due to system failure + * duplicateCertReq (26) + * -- certificate cannot be issued because a duplicate + * -- certificate already exists + * } + */ +# define OSSL_CMP_PKIFAILUREINFO_badAlg 0 +# define OSSL_CMP_PKIFAILUREINFO_badMessageCheck 1 +# define OSSL_CMP_PKIFAILUREINFO_badRequest 2 +# define OSSL_CMP_PKIFAILUREINFO_badTime 3 +# define OSSL_CMP_PKIFAILUREINFO_badCertId 4 +# define OSSL_CMP_PKIFAILUREINFO_badDataFormat 5 +# define OSSL_CMP_PKIFAILUREINFO_wrongAuthority 6 +# define OSSL_CMP_PKIFAILUREINFO_incorrectData 7 +# define OSSL_CMP_PKIFAILUREINFO_missingTimeStamp 8 +# define OSSL_CMP_PKIFAILUREINFO_badPOP 9 +# define OSSL_CMP_PKIFAILUREINFO_certRevoked 10 +# define OSSL_CMP_PKIFAILUREINFO_certConfirmed 11 +# define OSSL_CMP_PKIFAILUREINFO_wrongIntegrity 12 +# define OSSL_CMP_PKIFAILUREINFO_badRecipientNonce 13 +# define OSSL_CMP_PKIFAILUREINFO_timeNotAvailable 14 +# define OSSL_CMP_PKIFAILUREINFO_unacceptedPolicy 15 +# define OSSL_CMP_PKIFAILUREINFO_unacceptedExtension 16 +# define OSSL_CMP_PKIFAILUREINFO_addInfoNotAvailable 17 +# define OSSL_CMP_PKIFAILUREINFO_badSenderNonce 18 +# define OSSL_CMP_PKIFAILUREINFO_badCertTemplate 19 +# define OSSL_CMP_PKIFAILUREINFO_signerNotTrusted 20 +# define OSSL_CMP_PKIFAILUREINFO_transactionIdInUse 21 +# define OSSL_CMP_PKIFAILUREINFO_unsupportedVersion 22 +# define OSSL_CMP_PKIFAILUREINFO_notAuthorized 23 +# define OSSL_CMP_PKIFAILUREINFO_systemUnavail 24 +# define OSSL_CMP_PKIFAILUREINFO_systemFailure 25 +# define OSSL_CMP_PKIFAILUREINFO_duplicateCertReq 26 +# define OSSL_CMP_PKIFAILUREINFO_MAX 26 +# define OSSL_CMP_PKIFAILUREINFO_MAX_BIT_PATTERN \ + ((1 << (OSSL_CMP_PKIFAILUREINFO_MAX + 1)) - 1) +# if OSSL_CMP_PKIFAILUREINFO_MAX_BIT_PATTERN > INT_MAX +# error CMP_PKIFAILUREINFO_MAX bit pattern does not fit in type int +# endif +typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; + +# define OSSL_CMP_CTX_FAILINFO_badAlg (1 << 0) +# define OSSL_CMP_CTX_FAILINFO_badMessageCheck (1 << 1) +# define OSSL_CMP_CTX_FAILINFO_badRequest (1 << 2) +# define OSSL_CMP_CTX_FAILINFO_badTime (1 << 3) +# define OSSL_CMP_CTX_FAILINFO_badCertId (1 << 4) +# define OSSL_CMP_CTX_FAILINFO_badDataFormat (1 << 5) +# define OSSL_CMP_CTX_FAILINFO_wrongAuthority (1 << 6) +# define OSSL_CMP_CTX_FAILINFO_incorrectData (1 << 7) +# define OSSL_CMP_CTX_FAILINFO_missingTimeStamp (1 << 8) +# define OSSL_CMP_CTX_FAILINFO_badPOP (1 << 9) +# define OSSL_CMP_CTX_FAILINFO_certRevoked (1 << 10) +# define OSSL_CMP_CTX_FAILINFO_certConfirmed (1 << 11) +# define OSSL_CMP_CTX_FAILINFO_wrongIntegrity (1 << 12) +# define OSSL_CMP_CTX_FAILINFO_badRecipientNonce (1 << 13) +# define OSSL_CMP_CTX_FAILINFO_timeNotAvailable (1 << 14) +# define OSSL_CMP_CTX_FAILINFO_unacceptedPolicy (1 << 15) +# define OSSL_CMP_CTX_FAILINFO_unacceptedExtension (1 << 16) +# define OSSL_CMP_CTX_FAILINFO_addInfoNotAvailable (1 << 17) +# define OSSL_CMP_CTX_FAILINFO_badSenderNonce (1 << 18) +# define OSSL_CMP_CTX_FAILINFO_badCertTemplate (1 << 19) +# define OSSL_CMP_CTX_FAILINFO_signerNotTrusted (1 << 20) +# define OSSL_CMP_CTX_FAILINFO_transactionIdInUse (1 << 21) +# define OSSL_CMP_CTX_FAILINFO_unsupportedVersion (1 << 22) +# define OSSL_CMP_CTX_FAILINFO_notAuthorized (1 << 23) +# define OSSL_CMP_CTX_FAILINFO_systemUnavail (1 << 24) +# define OSSL_CMP_CTX_FAILINFO_systemFailure (1 << 25) +# define OSSL_CMP_CTX_FAILINFO_duplicateCertReq (1 << 26) + +/*- + * PKIStatus ::= INTEGER { + * accepted (0), + * -- you got exactly what you asked for + * grantedWithMods (1), + * -- you got something like what you asked for; the + * -- requester is responsible for ascertaining the differences + * rejection (2), + * -- you don't get it, more information elsewhere in the message + * waiting (3), + * -- the request body part has not yet been processed; expect to + * -- hear more later (note: proper handling of this status + * -- response MAY use the polling req/rep PKIMessages specified + * -- in Section 5.3.22; alternatively, polling in the underlying + * -- transport layer MAY have some utility in this regard) + * revocationWarning (4), + * -- this message contains a warning that a revocation is + * -- imminent + * revocationNotification (5), + * -- notification that a revocation has occurred + * keyUpdateWarning (6) + * -- update already done for the oldCertId specified in + * -- CertReqMsg + * } + */ +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_revocationNotification 5 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; + +DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) + +# define OSSL_CMP_CERTORENCCERT_CERTIFICATE 0 +# define OSSL_CMP_CERTORENCCERT_ENCRYPTEDCERT 1 + +/* data type declarations */ +typedef struct ossl_cmp_ctx_st OSSL_CMP_CTX; +typedef struct ossl_cmp_pkiheader_st OSSL_CMP_PKIHEADER; +DECLARE_ASN1_FUNCTIONS(OSSL_CMP_PKIHEADER) +typedef struct ossl_cmp_msg_st OSSL_CMP_MSG; +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_MSG) +DECLARE_ASN1_ENCODE_FUNCTIONS(OSSL_CMP_MSG, OSSL_CMP_MSG, OSSL_CMP_MSG) +typedef struct ossl_cmp_certstatus_st OSSL_CMP_CERTSTATUS; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTSTATUS, OSSL_CMP_CERTSTATUS, OSSL_CMP_CERTSTATUS) +#define sk_OSSL_CMP_CERTSTATUS_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_value(sk, idx) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTSTATUS_new(cmp) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTSTATUS_new_null() ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTSTATUS_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTSTATUS_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTSTATUS_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_delete(sk, i) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTSTATUS_delete_ptr(sk, ptr) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr))) +#define sk_OSSL_CMP_CERTSTATUS_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_pop(sk) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_shift(sk) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk),ossl_check_OSSL_CMP_CERTSTATUS_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTSTATUS_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTSTATUS_set(sk, idx, ptr) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr))) +#define sk_OSSL_CMP_CERTSTATUS_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr), pnum) +#define sk_OSSL_CMP_CERTSTATUS_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_dup(sk) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTSTATUS_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTSTATUS_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTSTATUS_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp))) + +typedef struct ossl_cmp_itav_st OSSL_CMP_ITAV; +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_ITAV) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_ITAV, OSSL_CMP_ITAV, OSSL_CMP_ITAV) +#define sk_OSSL_CMP_ITAV_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_value(sk, idx) ((OSSL_CMP_ITAV *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk), (idx))) +#define sk_OSSL_CMP_ITAV_new(cmp) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp))) +#define sk_OSSL_CMP_ITAV_new_null() ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_ITAV_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_ITAV_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (n)) +#define sk_OSSL_CMP_ITAV_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_delete(sk, i) ((OSSL_CMP_ITAV *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (i))) +#define sk_OSSL_CMP_ITAV_delete_ptr(sk, ptr) ((OSSL_CMP_ITAV *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr))) +#define sk_OSSL_CMP_ITAV_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_pop(sk) ((OSSL_CMP_ITAV *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_shift(sk) ((OSSL_CMP_ITAV *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_ITAV_sk_type(sk),ossl_check_OSSL_CMP_ITAV_freefunc_type(freefunc)) +#define sk_OSSL_CMP_ITAV_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr), (idx)) +#define sk_OSSL_CMP_ITAV_set(sk, idx, ptr) ((OSSL_CMP_ITAV *)OPENSSL_sk_set(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (idx), ossl_check_OSSL_CMP_ITAV_type(ptr))) +#define sk_OSSL_CMP_ITAV_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr), pnum) +#define sk_OSSL_CMP_ITAV_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_dup(sk) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_ITAV_freefunc_type(freefunc))) +#define sk_OSSL_CMP_ITAV_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_ITAV_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp))) + +typedef struct ossl_cmp_revrepcontent_st OSSL_CMP_REVREPCONTENT; +typedef struct ossl_cmp_pkisi_st OSSL_CMP_PKISI; +DECLARE_ASN1_FUNCTIONS(OSSL_CMP_PKISI) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_PKISI) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_PKISI, OSSL_CMP_PKISI, OSSL_CMP_PKISI) +#define sk_OSSL_CMP_PKISI_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_value(sk, idx) ((OSSL_CMP_PKISI *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk), (idx))) +#define sk_OSSL_CMP_PKISI_new(cmp) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp))) +#define sk_OSSL_CMP_PKISI_new_null() ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_PKISI_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_PKISI_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (n)) +#define sk_OSSL_CMP_PKISI_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_delete(sk, i) ((OSSL_CMP_PKISI *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (i))) +#define sk_OSSL_CMP_PKISI_delete_ptr(sk, ptr) ((OSSL_CMP_PKISI *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr))) +#define sk_OSSL_CMP_PKISI_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_pop(sk) ((OSSL_CMP_PKISI *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_shift(sk) ((OSSL_CMP_PKISI *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_PKISI_sk_type(sk),ossl_check_OSSL_CMP_PKISI_freefunc_type(freefunc)) +#define sk_OSSL_CMP_PKISI_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr), (idx)) +#define sk_OSSL_CMP_PKISI_set(sk, idx, ptr) ((OSSL_CMP_PKISI *)OPENSSL_sk_set(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (idx), ossl_check_OSSL_CMP_PKISI_type(ptr))) +#define sk_OSSL_CMP_PKISI_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr), pnum) +#define sk_OSSL_CMP_PKISI_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_dup(sk) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_PKISI_freefunc_type(freefunc))) +#define sk_OSSL_CMP_PKISI_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_PKISI_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp))) + +typedef struct ossl_cmp_certrepmessage_st OSSL_CMP_CERTREPMESSAGE; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTREPMESSAGE, OSSL_CMP_CERTREPMESSAGE, OSSL_CMP_CERTREPMESSAGE) +#define sk_OSSL_CMP_CERTREPMESSAGE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_value(sk, idx) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTREPMESSAGE_new(cmp) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTREPMESSAGE_new_null() ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTREPMESSAGE_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTREPMESSAGE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTREPMESSAGE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_delete(sk, i) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTREPMESSAGE_delete_ptr(sk, ptr) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr))) +#define sk_OSSL_CMP_CERTREPMESSAGE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_pop(sk) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_shift(sk) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk),ossl_check_OSSL_CMP_CERTREPMESSAGE_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTREPMESSAGE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTREPMESSAGE_set(sk, idx, ptr) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr))) +#define sk_OSSL_CMP_CERTREPMESSAGE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr), pnum) +#define sk_OSSL_CMP_CERTREPMESSAGE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_dup(sk) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTREPMESSAGE_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTREPMESSAGE_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTREPMESSAGE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp))) + +typedef struct ossl_cmp_pollrep_st OSSL_CMP_POLLREP; +typedef STACK_OF(OSSL_CMP_POLLREP) OSSL_CMP_POLLREPCONTENT; +typedef struct ossl_cmp_certresponse_st OSSL_CMP_CERTRESPONSE; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTRESPONSE, OSSL_CMP_CERTRESPONSE, OSSL_CMP_CERTRESPONSE) +#define sk_OSSL_CMP_CERTRESPONSE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_value(sk, idx) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTRESPONSE_new(cmp) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTRESPONSE_new_null() ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTRESPONSE_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTRESPONSE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTRESPONSE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_delete(sk, i) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTRESPONSE_delete_ptr(sk, ptr) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr))) +#define sk_OSSL_CMP_CERTRESPONSE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_pop(sk) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_shift(sk) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk),ossl_check_OSSL_CMP_CERTRESPONSE_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTRESPONSE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTRESPONSE_set(sk, idx, ptr) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr))) +#define sk_OSSL_CMP_CERTRESPONSE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr), pnum) +#define sk_OSSL_CMP_CERTRESPONSE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_dup(sk) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTRESPONSE_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTRESPONSE_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTRESPONSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp))) + +typedef STACK_OF(ASN1_UTF8STRING) OSSL_CMP_PKIFREETEXT; + +/* + * function DECLARATIONS + */ + +/* from cmp_asn.c */ +OSSL_CMP_ITAV *OSSL_CMP_ITAV_create(ASN1_OBJECT *type, ASN1_TYPE *value); +void OSSL_CMP_ITAV_set0(OSSL_CMP_ITAV *itav, ASN1_OBJECT *type, + ASN1_TYPE *value); +ASN1_OBJECT *OSSL_CMP_ITAV_get0_type(const OSSL_CMP_ITAV *itav); +ASN1_TYPE *OSSL_CMP_ITAV_get0_value(const OSSL_CMP_ITAV *itav); +int OSSL_CMP_ITAV_push0_stack_item(STACK_OF(OSSL_CMP_ITAV) **itav_sk_p, + OSSL_CMP_ITAV *itav); +void OSSL_CMP_ITAV_free(OSSL_CMP_ITAV *itav); + +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_caCerts(const STACK_OF(X509) *caCerts); +int OSSL_CMP_ITAV_get0_caCerts(const OSSL_CMP_ITAV *itav, STACK_OF(X509) **out); + +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_rootCaCert(const X509 *rootCaCert); +int OSSL_CMP_ITAV_get0_rootCaCert(const OSSL_CMP_ITAV *itav, X509 **out); +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_rootCaKeyUpdate(const X509 *newWithNew, + const X509 *newWithOld, + const X509 *oldWithNew); +int OSSL_CMP_ITAV_get0_rootCaKeyUpdate(const OSSL_CMP_ITAV *itav, + X509 **newWithNew, + X509 **newWithOld, + X509 **oldWithNew); + +void OSSL_CMP_MSG_free(OSSL_CMP_MSG *msg); + +/* from cmp_ctx.c */ +OSSL_CMP_CTX *OSSL_CMP_CTX_new(OSSL_LIB_CTX *libctx, const char *propq); +void OSSL_CMP_CTX_free(OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_reinit(OSSL_CMP_CTX *ctx); +OSSL_LIB_CTX *OSSL_CMP_CTX_get0_libctx(const OSSL_CMP_CTX *ctx); +const char *OSSL_CMP_CTX_get0_propq(const OSSL_CMP_CTX *ctx); +/* CMP general options: */ +# define OSSL_CMP_OPT_LOG_VERBOSITY 0 +/* CMP transfer options: */ +# define OSSL_CMP_OPT_KEEP_ALIVE 10 +# define OSSL_CMP_OPT_MSG_TIMEOUT 11 +# define OSSL_CMP_OPT_TOTAL_TIMEOUT 12 +# define OSSL_CMP_OPT_USE_TLS 13 +/* CMP request options: */ +# define OSSL_CMP_OPT_VALIDITY_DAYS 20 +# define OSSL_CMP_OPT_SUBJECTALTNAME_NODEFAULT 21 +# define OSSL_CMP_OPT_SUBJECTALTNAME_CRITICAL 22 +# define OSSL_CMP_OPT_POLICIES_CRITICAL 23 +# define OSSL_CMP_OPT_POPO_METHOD 24 +# define OSSL_CMP_OPT_IMPLICIT_CONFIRM 25 +# define OSSL_CMP_OPT_DISABLE_CONFIRM 26 +# define OSSL_CMP_OPT_REVOCATION_REASON 27 +/* CMP protection options: */ +# define OSSL_CMP_OPT_UNPROTECTED_SEND 30 +# define OSSL_CMP_OPT_UNPROTECTED_ERRORS 31 +# define OSSL_CMP_OPT_OWF_ALGNID 32 +# define OSSL_CMP_OPT_MAC_ALGNID 33 +# define OSSL_CMP_OPT_DIGEST_ALGNID 34 +# define OSSL_CMP_OPT_IGNORE_KEYUSAGE 35 +# define OSSL_CMP_OPT_PERMIT_TA_IN_EXTRACERTS_FOR_IR 36 +int OSSL_CMP_CTX_set_option(OSSL_CMP_CTX *ctx, int opt, int val); +int OSSL_CMP_CTX_get_option(const OSSL_CMP_CTX *ctx, int opt); +/* CMP-specific callback for logging and outputting the error queue: */ +int OSSL_CMP_CTX_set_log_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_log_cb_t cb); +# define OSSL_CMP_CTX_set_log_verbosity(ctx, level) \ + OSSL_CMP_CTX_set_option(ctx, OSSL_CMP_OPT_LOG_VERBOSITY, level) +void OSSL_CMP_CTX_print_errors(const OSSL_CMP_CTX *ctx); +/* message transfer: */ +int OSSL_CMP_CTX_set1_serverPath(OSSL_CMP_CTX *ctx, const char *path); +int OSSL_CMP_CTX_set1_server(OSSL_CMP_CTX *ctx, const char *address); +int OSSL_CMP_CTX_set_serverPort(OSSL_CMP_CTX *ctx, int port); +int OSSL_CMP_CTX_set1_proxy(OSSL_CMP_CTX *ctx, const char *name); +int OSSL_CMP_CTX_set1_no_proxy(OSSL_CMP_CTX *ctx, const char *names); +# ifndef OPENSSL_NO_HTTP +int OSSL_CMP_CTX_set_http_cb(OSSL_CMP_CTX *ctx, OSSL_HTTP_bio_cb_t cb); +int OSSL_CMP_CTX_set_http_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_http_cb_arg(const OSSL_CMP_CTX *ctx); +# endif +typedef OSSL_CMP_MSG *(*OSSL_CMP_transfer_cb_t) (OSSL_CMP_CTX *ctx, + const OSSL_CMP_MSG *req); +int OSSL_CMP_CTX_set_transfer_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_transfer_cb_t cb); +int OSSL_CMP_CTX_set_transfer_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_transfer_cb_arg(const OSSL_CMP_CTX *ctx); +/* server authentication: */ +int OSSL_CMP_CTX_set1_srvCert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_set1_expected_sender(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_set0_trustedStore(OSSL_CMP_CTX *ctx, X509_STORE *store); +# define OSSL_CMP_CTX_set0_trusted OSSL_CMP_CTX_set0_trustedStore +X509_STORE *OSSL_CMP_CTX_get0_trustedStore(const OSSL_CMP_CTX *ctx); +# define OSSL_CMP_CTX_get0_trusted OSSL_CMP_CTX_get0_trustedStore +int OSSL_CMP_CTX_set1_untrusted(OSSL_CMP_CTX *ctx, STACK_OF(X509) *certs); +STACK_OF(X509) *OSSL_CMP_CTX_get0_untrusted(const OSSL_CMP_CTX *ctx); +/* client authentication: */ +int OSSL_CMP_CTX_set1_cert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, + STACK_OF(X509) *candidates); +int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); +int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, + const unsigned char *ref, int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); +/* CMP message header and extra certificates: */ +int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, + STACK_OF(X509) *extraCertsOut); +/* certificate template: */ +int OSSL_CMP_CTX_set0_newPkey(OSSL_CMP_CTX *ctx, int priv, EVP_PKEY *pkey); +EVP_PKEY *OSSL_CMP_CTX_get0_newPkey(const OSSL_CMP_CTX *ctx, int priv); +int OSSL_CMP_CTX_set1_issuer(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_set1_serialNumber(OSSL_CMP_CTX *ctx, const ASN1_INTEGER *sn); +int OSSL_CMP_CTX_set1_subjectName(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_push1_subjectAltName(OSSL_CMP_CTX *ctx, + const GENERAL_NAME *name); +int OSSL_CMP_CTX_set0_reqExtensions(OSSL_CMP_CTX *ctx, X509_EXTENSIONS *exts); +int OSSL_CMP_CTX_reqExtensions_have_SAN(OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_push0_policy(OSSL_CMP_CTX *ctx, POLICYINFO *pinfo); +int OSSL_CMP_CTX_set1_oldCert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_set1_p10CSR(OSSL_CMP_CTX *ctx, const X509_REQ *csr); +/* misc body contents: */ +int OSSL_CMP_CTX_push0_genm_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +/* certificate confirmation: */ +typedef int (*OSSL_CMP_certConf_cb_t) (OSSL_CMP_CTX *ctx, X509 *cert, + int fail_info, const char **txt); +int OSSL_CMP_certConf_cb(OSSL_CMP_CTX *ctx, X509 *cert, int fail_info, + const char **text); +int OSSL_CMP_CTX_set_certConf_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_certConf_cb_t cb); +int OSSL_CMP_CTX_set_certConf_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_certConf_cb_arg(const OSSL_CMP_CTX *ctx); +/* result fetching: */ +int OSSL_CMP_CTX_get_status(const OSSL_CMP_CTX *ctx); +OSSL_CMP_PKIFREETEXT *OSSL_CMP_CTX_get0_statusString(const OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_get_failInfoCode(const OSSL_CMP_CTX *ctx); +# define OSSL_CMP_PKISI_BUFLEN 1024 +X509 *OSSL_CMP_CTX_get0_validatedSrvCert(const OSSL_CMP_CTX *ctx); +X509 *OSSL_CMP_CTX_get0_newCert(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_newChain(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_caPubs(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_extraCertsIn(const OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_set1_transactionID(OSSL_CMP_CTX *ctx, + const ASN1_OCTET_STRING *id); +int OSSL_CMP_CTX_set1_senderNonce(OSSL_CMP_CTX *ctx, + const ASN1_OCTET_STRING *nonce); + +/* from cmp_status.c */ +char *OSSL_CMP_CTX_snprint_PKIStatus(const OSSL_CMP_CTX *ctx, char *buf, + size_t bufsize); +char *OSSL_CMP_snprint_PKIStatusInfo(const OSSL_CMP_PKISI *statusInfo, + char *buf, size_t bufsize); +OSSL_CMP_PKISI * +OSSL_CMP_STATUSINFO_new(int status, int fail_info, const char *text); + +/* from cmp_hdr.c */ +ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_transactionID(const + OSSL_CMP_PKIHEADER *hdr); +ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); + +/* from cmp_msg.c */ +OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); +OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, + const char *propq); +int OSSL_CMP_MSG_write(const char *file, const OSSL_CMP_MSG *msg); +OSSL_CMP_MSG *d2i_OSSL_CMP_MSG_bio(BIO *bio, OSSL_CMP_MSG **msg); +int i2d_OSSL_CMP_MSG_bio(BIO *bio, const OSSL_CMP_MSG *msg); + +/* from cmp_vfy.c */ +int OSSL_CMP_validate_msg(OSSL_CMP_CTX *ctx, const OSSL_CMP_MSG *msg); +int OSSL_CMP_validate_cert_path(const OSSL_CMP_CTX *ctx, + X509_STORE *trusted_store, X509 *cert); + +/* from cmp_http.c */ +# ifndef OPENSSL_NO_HTTP +OSSL_CMP_MSG *OSSL_CMP_MSG_http_perform(OSSL_CMP_CTX *ctx, + const OSSL_CMP_MSG *req); +# endif + +/* from cmp_server.c */ +typedef struct ossl_cmp_srv_ctx_st OSSL_CMP_SRV_CTX; +OSSL_CMP_MSG *OSSL_CMP_SRV_process_request(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req); +OSSL_CMP_MSG * OSSL_CMP_CTX_server_perform(OSSL_CMP_CTX *client_ctx, + const OSSL_CMP_MSG *req); +OSSL_CMP_SRV_CTX *OSSL_CMP_SRV_CTX_new(OSSL_LIB_CTX *libctx, const char *propq); +void OSSL_CMP_SRV_CTX_free(OSSL_CMP_SRV_CTX *srv_ctx); +typedef OSSL_CMP_PKISI *(*OSSL_CMP_SRV_cert_request_cb_t) + (OSSL_CMP_SRV_CTX *srv_ctx, const OSSL_CMP_MSG *req, int certReqId, + const OSSL_CRMF_MSG *crm, const X509_REQ *p10cr, + X509 **certOut, STACK_OF(X509) **chainOut, STACK_OF(X509) **caPubs); +typedef OSSL_CMP_PKISI *(*OSSL_CMP_SRV_rr_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const X509_NAME *issuer, + const ASN1_INTEGER *serial); +typedef int (*OSSL_CMP_SRV_genm_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const STACK_OF(OSSL_CMP_ITAV) *in, + STACK_OF(OSSL_CMP_ITAV) **out); +typedef void (*OSSL_CMP_SRV_error_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const OSSL_CMP_PKISI *statusInfo, + const ASN1_INTEGER *errorCode, + const OSSL_CMP_PKIFREETEXT *errDetails); +typedef int (*OSSL_CMP_SRV_certConf_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + int certReqId, + const ASN1_OCTET_STRING *certHash, + const OSSL_CMP_PKISI *si); +typedef int (*OSSL_CMP_SRV_pollReq_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, int certReqId, + OSSL_CMP_MSG **certReq, + int64_t *check_after); +int OSSL_CMP_SRV_CTX_init(OSSL_CMP_SRV_CTX *srv_ctx, void *custom_ctx, + OSSL_CMP_SRV_cert_request_cb_t process_cert_request, + OSSL_CMP_SRV_rr_cb_t process_rr, + OSSL_CMP_SRV_genm_cb_t process_genm, + OSSL_CMP_SRV_error_cb_t process_error, + OSSL_CMP_SRV_certConf_cb_t process_certConf, + OSSL_CMP_SRV_pollReq_cb_t process_pollReq); +OSSL_CMP_CTX *OSSL_CMP_SRV_CTX_get0_cmp_ctx(const OSSL_CMP_SRV_CTX *srv_ctx); +void *OSSL_CMP_SRV_CTX_get0_custom_ctx(const OSSL_CMP_SRV_CTX *srv_ctx); +int OSSL_CMP_SRV_CTX_set_send_unprotected_errors(OSSL_CMP_SRV_CTX *srv_ctx, + int val); +int OSSL_CMP_SRV_CTX_set_accept_unprotected(OSSL_CMP_SRV_CTX *srv_ctx, int val); +int OSSL_CMP_SRV_CTX_set_accept_raverified(OSSL_CMP_SRV_CTX *srv_ctx, int val); +int OSSL_CMP_SRV_CTX_set_grant_implicit_confirm(OSSL_CMP_SRV_CTX *srv_ctx, + int val); + +/* from cmp_client.c */ +X509 *OSSL_CMP_exec_certreq(OSSL_CMP_CTX *ctx, int req_type, + const OSSL_CRMF_MSG *crm); +# define OSSL_CMP_IR 0 +# define OSSL_CMP_CR 2 +# define OSSL_CMP_P10CR 4 +# define OSSL_CMP_KUR 7 +# define OSSL_CMP_exec_IR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_IR, NULL) +# define OSSL_CMP_exec_CR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_CR, NULL) +# define OSSL_CMP_exec_P10CR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_P10CR, NULL) +# define OSSL_CMP_exec_KUR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_KUR, NULL) +int OSSL_CMP_try_certreq(OSSL_CMP_CTX *ctx, int req_type, + const OSSL_CRMF_MSG *crm, int *checkAfter); +int OSSL_CMP_exec_RR_ses(OSSL_CMP_CTX *ctx); +STACK_OF(OSSL_CMP_ITAV) *OSSL_CMP_exec_GENM_ses(OSSL_CMP_CTX *ctx); + +/* from cmp_genm.c */ +int OSSL_CMP_get1_caCerts(OSSL_CMP_CTX *ctx, STACK_OF(X509) **out); +int OSSL_CMP_get1_rootCaKeyUpdate(OSSL_CMP_CTX *ctx, + const X509 *oldWithOld, X509 **newWithNew, + X509 **newWithOld, X509 **oldWithNew); + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_CMP) */ +#endif /* !defined(OPENSSL_CMP_H) */ diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/cms.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cms.h new file mode 100644 index 00000000000..fe86a5c7da2 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cms.h @@ -0,0 +1,508 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/cms.h.in + * + * Copyright 2008-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CMS_H +# define OPENSSL_CMS_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CMS_H +# endif + +# include + +# ifndef OPENSSL_NO_CMS +# include +# include +# include +# ifdef __cplusplus +extern "C" { +# endif + +typedef struct CMS_EnvelopedData_st CMS_EnvelopedData; +typedef struct CMS_ContentInfo_st CMS_ContentInfo; +typedef struct CMS_SignerInfo_st CMS_SignerInfo; +typedef struct CMS_SignedData_st CMS_SignedData; +typedef struct CMS_CertificateChoices CMS_CertificateChoices; +typedef struct CMS_RevocationInfoChoice_st CMS_RevocationInfoChoice; +typedef struct CMS_RecipientInfo_st CMS_RecipientInfo; +typedef struct CMS_ReceiptRequest_st CMS_ReceiptRequest; +typedef struct CMS_Receipt_st CMS_Receipt; +typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey; +typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute; + +SKM_DEFINE_STACK_OF_INTERNAL(CMS_SignerInfo, CMS_SignerInfo, CMS_SignerInfo) +#define sk_CMS_SignerInfo_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_value(sk, idx) ((CMS_SignerInfo *)OPENSSL_sk_value(ossl_check_const_CMS_SignerInfo_sk_type(sk), (idx))) +#define sk_CMS_SignerInfo_new(cmp) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new(ossl_check_CMS_SignerInfo_compfunc_type(cmp))) +#define sk_CMS_SignerInfo_new_null() ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new_null()) +#define sk_CMS_SignerInfo_new_reserve(cmp, n) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new_reserve(ossl_check_CMS_SignerInfo_compfunc_type(cmp), (n))) +#define sk_CMS_SignerInfo_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_SignerInfo_sk_type(sk), (n)) +#define sk_CMS_SignerInfo_free(sk) OPENSSL_sk_free(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_delete(sk, i) ((CMS_SignerInfo *)OPENSSL_sk_delete(ossl_check_CMS_SignerInfo_sk_type(sk), (i))) +#define sk_CMS_SignerInfo_delete_ptr(sk, ptr) ((CMS_SignerInfo *)OPENSSL_sk_delete_ptr(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr))) +#define sk_CMS_SignerInfo_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_pop(sk) ((CMS_SignerInfo *)OPENSSL_sk_pop(ossl_check_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_shift(sk) ((CMS_SignerInfo *)OPENSSL_sk_shift(ossl_check_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_SignerInfo_sk_type(sk),ossl_check_CMS_SignerInfo_freefunc_type(freefunc)) +#define sk_CMS_SignerInfo_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr), (idx)) +#define sk_CMS_SignerInfo_set(sk, idx, ptr) ((CMS_SignerInfo *)OPENSSL_sk_set(ossl_check_CMS_SignerInfo_sk_type(sk), (idx), ossl_check_CMS_SignerInfo_type(ptr))) +#define sk_CMS_SignerInfo_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr), pnum) +#define sk_CMS_SignerInfo_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_dup(sk) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_dup(ossl_check_const_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_copyfunc_type(copyfunc), ossl_check_CMS_SignerInfo_freefunc_type(freefunc))) +#define sk_CMS_SignerInfo_set_cmp_func(sk, cmp) ((sk_CMS_SignerInfo_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey) +#define sk_CMS_RecipientEncryptedKey_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_value(sk, idx) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_value(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk), (idx))) +#define sk_CMS_RecipientEncryptedKey_new(cmp) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new(ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp))) +#define sk_CMS_RecipientEncryptedKey_new_null() ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new_null()) +#define sk_CMS_RecipientEncryptedKey_new_reserve(cmp, n) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp), (n))) +#define sk_CMS_RecipientEncryptedKey_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (n)) +#define sk_CMS_RecipientEncryptedKey_free(sk) OPENSSL_sk_free(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_delete(sk, i) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_delete(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (i))) +#define sk_CMS_RecipientEncryptedKey_delete_ptr(sk, ptr) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr))) +#define sk_CMS_RecipientEncryptedKey_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_pop(sk) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_pop(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_shift(sk) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_shift(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk),ossl_check_CMS_RecipientEncryptedKey_freefunc_type(freefunc)) +#define sk_CMS_RecipientEncryptedKey_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr), (idx)) +#define sk_CMS_RecipientEncryptedKey_set(sk, idx, ptr) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_set(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (idx), ossl_check_CMS_RecipientEncryptedKey_type(ptr))) +#define sk_CMS_RecipientEncryptedKey_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr), pnum) +#define sk_CMS_RecipientEncryptedKey_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_dup(sk) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_dup(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_copyfunc_type(copyfunc), ossl_check_CMS_RecipientEncryptedKey_freefunc_type(freefunc))) +#define sk_CMS_RecipientEncryptedKey_set_cmp_func(sk, cmp) ((sk_CMS_RecipientEncryptedKey_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RecipientInfo, CMS_RecipientInfo, CMS_RecipientInfo) +#define sk_CMS_RecipientInfo_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_value(sk, idx) ((CMS_RecipientInfo *)OPENSSL_sk_value(ossl_check_const_CMS_RecipientInfo_sk_type(sk), (idx))) +#define sk_CMS_RecipientInfo_new(cmp) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new(ossl_check_CMS_RecipientInfo_compfunc_type(cmp))) +#define sk_CMS_RecipientInfo_new_null() ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new_null()) +#define sk_CMS_RecipientInfo_new_reserve(cmp, n) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RecipientInfo_compfunc_type(cmp), (n))) +#define sk_CMS_RecipientInfo_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RecipientInfo_sk_type(sk), (n)) +#define sk_CMS_RecipientInfo_free(sk) OPENSSL_sk_free(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_delete(sk, i) ((CMS_RecipientInfo *)OPENSSL_sk_delete(ossl_check_CMS_RecipientInfo_sk_type(sk), (i))) +#define sk_CMS_RecipientInfo_delete_ptr(sk, ptr) ((CMS_RecipientInfo *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr))) +#define sk_CMS_RecipientInfo_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_pop(sk) ((CMS_RecipientInfo *)OPENSSL_sk_pop(ossl_check_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_shift(sk) ((CMS_RecipientInfo *)OPENSSL_sk_shift(ossl_check_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RecipientInfo_sk_type(sk),ossl_check_CMS_RecipientInfo_freefunc_type(freefunc)) +#define sk_CMS_RecipientInfo_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr), (idx)) +#define sk_CMS_RecipientInfo_set(sk, idx, ptr) ((CMS_RecipientInfo *)OPENSSL_sk_set(ossl_check_CMS_RecipientInfo_sk_type(sk), (idx), ossl_check_CMS_RecipientInfo_type(ptr))) +#define sk_CMS_RecipientInfo_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr), pnum) +#define sk_CMS_RecipientInfo_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_dup(sk) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_dup(ossl_check_const_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_copyfunc_type(copyfunc), ossl_check_CMS_RecipientInfo_freefunc_type(freefunc))) +#define sk_CMS_RecipientInfo_set_cmp_func(sk, cmp) ((sk_CMS_RecipientInfo_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RevocationInfoChoice, CMS_RevocationInfoChoice, CMS_RevocationInfoChoice) +#define sk_CMS_RevocationInfoChoice_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_value(sk, idx) ((CMS_RevocationInfoChoice *)OPENSSL_sk_value(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk), (idx))) +#define sk_CMS_RevocationInfoChoice_new(cmp) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new(ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp))) +#define sk_CMS_RevocationInfoChoice_new_null() ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new_null()) +#define sk_CMS_RevocationInfoChoice_new_reserve(cmp, n) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp), (n))) +#define sk_CMS_RevocationInfoChoice_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (n)) +#define sk_CMS_RevocationInfoChoice_free(sk) OPENSSL_sk_free(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_delete(sk, i) ((CMS_RevocationInfoChoice *)OPENSSL_sk_delete(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (i))) +#define sk_CMS_RevocationInfoChoice_delete_ptr(sk, ptr) ((CMS_RevocationInfoChoice *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr))) +#define sk_CMS_RevocationInfoChoice_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_pop(sk) ((CMS_RevocationInfoChoice *)OPENSSL_sk_pop(ossl_check_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_shift(sk) ((CMS_RevocationInfoChoice *)OPENSSL_sk_shift(ossl_check_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RevocationInfoChoice_sk_type(sk),ossl_check_CMS_RevocationInfoChoice_freefunc_type(freefunc)) +#define sk_CMS_RevocationInfoChoice_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr), (idx)) +#define sk_CMS_RevocationInfoChoice_set(sk, idx, ptr) ((CMS_RevocationInfoChoice *)OPENSSL_sk_set(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (idx), ossl_check_CMS_RevocationInfoChoice_type(ptr))) +#define sk_CMS_RevocationInfoChoice_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr), pnum) +#define sk_CMS_RevocationInfoChoice_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_dup(sk) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_dup(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_copyfunc_type(copyfunc), ossl_check_CMS_RevocationInfoChoice_freefunc_type(freefunc))) +#define sk_CMS_RevocationInfoChoice_set_cmp_func(sk, cmp) ((sk_CMS_RevocationInfoChoice_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp))) + + +DECLARE_ASN1_ITEM(CMS_EnvelopedData) +DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_SignedData) +DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo) +DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest) +DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) + +CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +# define CMS_SIGNERINFO_ISSUER_SERIAL 0 +# define CMS_SIGNERINFO_KEYIDENTIFIER 1 + +# define CMS_RECIPINFO_NONE -1 +# define CMS_RECIPINFO_TRANS 0 +# define CMS_RECIPINFO_AGREE 1 +# define CMS_RECIPINFO_KEK 2 +# define CMS_RECIPINFO_PASS 3 +# define CMS_RECIPINFO_OTHER 4 + +/* S/MIME related flags */ + +# define CMS_TEXT 0x1 +# define CMS_NOCERTS 0x2 +# define CMS_NO_CONTENT_VERIFY 0x4 +# define CMS_NO_ATTR_VERIFY 0x8 +# define CMS_NOSIGS \ + (CMS_NO_CONTENT_VERIFY|CMS_NO_ATTR_VERIFY) +# define CMS_NOINTERN 0x10 +# define CMS_NO_SIGNER_CERT_VERIFY 0x20 +# define CMS_NOVERIFY 0x20 +# define CMS_DETACHED 0x40 +# define CMS_BINARY 0x80 +# define CMS_NOATTR 0x100 +# define CMS_NOSMIMECAP 0x200 +# define CMS_NOOLDMIMETYPE 0x400 +# define CMS_CRLFEOL 0x800 +# define CMS_STREAM 0x1000 +# define CMS_NOCRL 0x2000 +# define CMS_PARTIAL 0x4000 +# define CMS_REUSE_DIGEST 0x8000 +# define CMS_USE_KEYID 0x10000 +# define CMS_DEBUG_DECRYPT 0x20000 +# define CMS_KEY_PARAM 0x40000 +# define CMS_ASCIICRLF 0x80000 +# define CMS_CADES 0x100000 +# define CMS_USE_ORIGINATOR_KEYID 0x200000 + +const ASN1_OBJECT *CMS_get0_type(const CMS_ContentInfo *cms); + +BIO *CMS_dataInit(CMS_ContentInfo *cms, BIO *icont); +int CMS_dataFinal(CMS_ContentInfo *cms, BIO *bio); + +ASN1_OCTET_STRING **CMS_get0_content(CMS_ContentInfo *cms); +int CMS_is_detached(CMS_ContentInfo *cms); +int CMS_set_detached(CMS_ContentInfo *cms, int detached); + +# ifdef OPENSSL_PEM_H +DECLARE_PEM_rw(CMS, CMS_ContentInfo) +# endif +int CMS_stream(unsigned char ***boundary, CMS_ContentInfo *cms); +CMS_ContentInfo *d2i_CMS_bio(BIO *bp, CMS_ContentInfo **cms); +int i2d_CMS_bio(BIO *bp, CMS_ContentInfo *cms); + +BIO *BIO_new_CMS(BIO *out, CMS_ContentInfo *cms); +int i2d_CMS_bio_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags); +int PEM_write_bio_CMS_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, + int flags); +CMS_ContentInfo *SMIME_read_CMS(BIO *bio, BIO **bcont); +CMS_ContentInfo *SMIME_read_CMS_ex(BIO *bio, int flags, BIO **bcont, CMS_ContentInfo **ci); +int SMIME_write_CMS(BIO *bio, CMS_ContentInfo *cms, BIO *data, int flags); + +int CMS_final(CMS_ContentInfo *cms, BIO *data, BIO *dcont, + unsigned int flags); +int CMS_final_digest(CMS_ContentInfo *cms, + const unsigned char *md, unsigned int mdlen, BIO *dcont, + unsigned int flags); + +CMS_ContentInfo *CMS_sign(X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, BIO *data, + unsigned int flags); +CMS_ContentInfo *CMS_sign_ex(X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, BIO *data, + unsigned int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +CMS_ContentInfo *CMS_sign_receipt(CMS_SignerInfo *si, + X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, unsigned int flags); + +int CMS_data(CMS_ContentInfo *cms, BIO *out, unsigned int flags); +CMS_ContentInfo *CMS_data_create(BIO *in, unsigned int flags); +CMS_ContentInfo *CMS_data_create_ex(BIO *in, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_digest_verify(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_digest_create(BIO *in, const EVP_MD *md, + unsigned int flags); +CMS_ContentInfo *CMS_digest_create_ex(BIO *in, const EVP_MD *md, + unsigned int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +int CMS_EncryptedData_decrypt(CMS_ContentInfo *cms, + const unsigned char *key, size_t keylen, + BIO *dcont, BIO *out, unsigned int flags); +CMS_ContentInfo *CMS_EncryptedData_encrypt(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, + size_t keylen, unsigned int flags); +CMS_ContentInfo *CMS_EncryptedData_encrypt_ex(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, + size_t keylen, unsigned int flags, + OSSL_LIB_CTX *libctx, + const char *propq); + +int CMS_EncryptedData_set1_key(CMS_ContentInfo *cms, const EVP_CIPHER *ciph, + const unsigned char *key, size_t keylen); + +int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + X509_STORE *store, BIO *dcont, BIO *out, unsigned int flags); + +int CMS_verify_receipt(CMS_ContentInfo *rcms, CMS_ContentInfo *ocms, + STACK_OF(X509) *certs, + X509_STORE *store, unsigned int flags); + +STACK_OF(X509) *CMS_get0_signers(CMS_ContentInfo *cms); + +CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, unsigned int flags); +CMS_ContentInfo *CMS_encrypt_ex(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pkey, X509 *cert, + BIO *dcont, BIO *out, unsigned int flags); + +int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert); +int CMS_decrypt_set1_pkey_and_peer(CMS_ContentInfo *cms, EVP_PKEY *pk, + X509 *cert, X509 *peer); +int CMS_decrypt_set1_key(CMS_ContentInfo *cms, + unsigned char *key, size_t keylen, + const unsigned char *id, size_t idlen); +int CMS_decrypt_set1_password(CMS_ContentInfo *cms, + unsigned char *pass, ossl_ssize_t passlen); + +STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms); +int CMS_RecipientInfo_type(CMS_RecipientInfo *ri); +EVP_PKEY_CTX *CMS_RecipientInfo_get0_pkey_ctx(CMS_RecipientInfo *ri); +CMS_ContentInfo *CMS_AuthEnvelopedData_create(const EVP_CIPHER *cipher); +CMS_ContentInfo * +CMS_AuthEnvelopedData_create_ex(const EVP_CIPHER *cipher, OSSL_LIB_CTX *libctx, + const char *propq); +CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher); +CMS_ContentInfo *CMS_EnvelopedData_create_ex(const EVP_CIPHER *cipher, + OSSL_LIB_CTX *libctx, + const char *propq); +BIO *CMS_EnvelopedData_decrypt(CMS_EnvelopedData *env, BIO *detached_data, + EVP_PKEY *pkey, X509 *cert, + ASN1_OCTET_STRING *secret, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, + X509 *recip, unsigned int flags); +CMS_RecipientInfo *CMS_add1_recipient(CMS_ContentInfo *cms, X509 *recip, + EVP_PKEY *originatorPrivKey, X509 * originator, unsigned int flags); +int CMS_RecipientInfo_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pkey); +int CMS_RecipientInfo_ktri_cert_cmp(CMS_RecipientInfo *ri, X509 *cert); +int CMS_RecipientInfo_ktri_get0_algs(CMS_RecipientInfo *ri, + EVP_PKEY **pk, X509 **recip, + X509_ALGOR **palg); +int CMS_RecipientInfo_ktri_get0_signer_id(CMS_RecipientInfo *ri, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno); + +CMS_RecipientInfo *CMS_add0_recipient_key(CMS_ContentInfo *cms, int nid, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen, + ASN1_GENERALIZEDTIME *date, + ASN1_OBJECT *otherTypeId, + ASN1_TYPE *otherType); + +int CMS_RecipientInfo_kekri_get0_id(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pid, + ASN1_GENERALIZEDTIME **pdate, + ASN1_OBJECT **potherid, + ASN1_TYPE **pothertype); + +int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri, + unsigned char *key, size_t keylen); + +int CMS_RecipientInfo_kekri_id_cmp(CMS_RecipientInfo *ri, + const unsigned char *id, size_t idlen); + +int CMS_RecipientInfo_set0_password(CMS_RecipientInfo *ri, + unsigned char *pass, + ossl_ssize_t passlen); + +CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, + int iter, int wrap_nid, + int pbe_nid, + unsigned char *pass, + ossl_ssize_t passlen, + const EVP_CIPHER *kekciph); + +int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); +int CMS_RecipientInfo_encrypt(const CMS_ContentInfo *cms, CMS_RecipientInfo *ri); + +int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_compress(BIO *in, int comp_nid, unsigned int flags); + +int CMS_set1_eContentType(CMS_ContentInfo *cms, const ASN1_OBJECT *oid); +const ASN1_OBJECT *CMS_get0_eContentType(CMS_ContentInfo *cms); + +CMS_CertificateChoices *CMS_add0_CertificateChoices(CMS_ContentInfo *cms); +int CMS_add0_cert(CMS_ContentInfo *cms, X509 *cert); +int CMS_add1_cert(CMS_ContentInfo *cms, X509 *cert); +STACK_OF(X509) *CMS_get1_certs(CMS_ContentInfo *cms); + +CMS_RevocationInfoChoice *CMS_add0_RevocationInfoChoice(CMS_ContentInfo *cms); +int CMS_add0_crl(CMS_ContentInfo *cms, X509_CRL *crl); +int CMS_add1_crl(CMS_ContentInfo *cms, X509_CRL *crl); +STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms); + +int CMS_SignedData_init(CMS_ContentInfo *cms); +CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, + X509 *signer, EVP_PKEY *pk, const EVP_MD *md, + unsigned int flags); +EVP_PKEY_CTX *CMS_SignerInfo_get0_pkey_ctx(CMS_SignerInfo *si); +EVP_MD_CTX *CMS_SignerInfo_get0_md_ctx(CMS_SignerInfo *si); +STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms); + +void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer); +int CMS_SignerInfo_get0_signer_id(CMS_SignerInfo *si, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_SignerInfo_cert_cmp(CMS_SignerInfo *si, X509 *cert); +int CMS_set1_signers_certs(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + unsigned int flags); +void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, + X509 **signer, X509_ALGOR **pdig, + X509_ALGOR **psig); +ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si); +int CMS_SignerInfo_sign(CMS_SignerInfo *si); +int CMS_SignerInfo_verify(CMS_SignerInfo *si); +int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain); +BIO *CMS_SignedData_verify(CMS_SignedData *sd, BIO *detached_data, + STACK_OF(X509) *scerts, X509_STORE *store, + STACK_OF(X509) *extra, STACK_OF(X509_CRL) *crls, + unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_add_smimecap(CMS_SignerInfo *si, STACK_OF(X509_ALGOR) *algs); +int CMS_add_simple_smimecap(STACK_OF(X509_ALGOR) **algs, + int algnid, int keysize); +int CMS_add_standard_smimecap(STACK_OF(X509_ALGOR) **smcap); + +int CMS_signed_get_attr_count(const CMS_SignerInfo *si); +int CMS_signed_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_signed_get_attr_by_OBJ(const CMS_SignerInfo *si, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *CMS_signed_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_signed_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_signed_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_signed_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_signed_get0_data_by_OBJ(const CMS_SignerInfo *si, + const ASN1_OBJECT *oid, + int lastpos, int type); + +int CMS_unsigned_get_attr_count(const CMS_SignerInfo *si); +int CMS_unsigned_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_unsigned_get_attr_by_OBJ(const CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int lastpos); +X509_ATTRIBUTE *CMS_unsigned_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_unsigned_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_unsigned_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_unsigned_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_unsigned_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type); + +int CMS_get1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest **prr); +CMS_ReceiptRequest *CMS_ReceiptRequest_create0( + unsigned char *id, int idlen, int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo); +CMS_ReceiptRequest *CMS_ReceiptRequest_create0_ex( + unsigned char *id, int idlen, int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo, + OSSL_LIB_CTX *libctx); + +int CMS_add1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest *rr); +void CMS_ReceiptRequest_get0_values(CMS_ReceiptRequest *rr, + ASN1_STRING **pcid, + int *pallorfirst, + STACK_OF(GENERAL_NAMES) **plist, + STACK_OF(GENERAL_NAMES) **prto); +int CMS_RecipientInfo_kari_get0_alg(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pukm); +STACK_OF(CMS_RecipientEncryptedKey) +*CMS_RecipientInfo_kari_get0_reks(CMS_RecipientInfo *ri); + +int CMS_RecipientInfo_kari_get0_orig_id(CMS_RecipientInfo *ri, + X509_ALGOR **pubalg, + ASN1_BIT_STRING **pubkey, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno); + +int CMS_RecipientInfo_kari_orig_id_cmp(CMS_RecipientInfo *ri, X509 *cert); + +int CMS_RecipientEncryptedKey_get0_id(CMS_RecipientEncryptedKey *rek, + ASN1_OCTET_STRING **keyid, + ASN1_GENERALIZEDTIME **tm, + CMS_OtherKeyAttribute **other, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_RecipientEncryptedKey_cert_cmp(CMS_RecipientEncryptedKey *rek, + X509 *cert); +int CMS_RecipientInfo_kari_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pk); +int CMS_RecipientInfo_kari_set0_pkey_and_peer(CMS_RecipientInfo *ri, EVP_PKEY *pk, X509 *peer); +EVP_CIPHER_CTX *CMS_RecipientInfo_kari_get0_ctx(CMS_RecipientInfo *ri); +int CMS_RecipientInfo_kari_decrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri, + CMS_RecipientEncryptedKey *rek); + +int CMS_SharedInfo_encode(unsigned char **pder, X509_ALGOR *kekalg, + ASN1_OCTET_STRING *ukm, int keylen); + +/* Backward compatibility for spelling errors. */ +# define CMS_R_UNKNOWN_DIGEST_ALGORITM CMS_R_UNKNOWN_DIGEST_ALGORITHM +# define CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE \ + CMS_R_UNSUPPORTED_RECIPIENTINFO_TYPE + +# ifdef __cplusplus +} +# endif +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/conf.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/conf.h new file mode 100644 index 00000000000..61bb008770f --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/conf.h @@ -0,0 +1,214 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/conf.h.in + * + * Copyright 1995-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CONF_H +# define OPENSSL_CONF_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CONF_H +# endif + +# include +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + char *section; + char *name; + char *value; +} CONF_VALUE; + +SKM_DEFINE_STACK_OF_INTERNAL(CONF_VALUE, CONF_VALUE, CONF_VALUE) +#define sk_CONF_VALUE_num(sk) OPENSSL_sk_num(ossl_check_const_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_value(sk, idx) ((CONF_VALUE *)OPENSSL_sk_value(ossl_check_const_CONF_VALUE_sk_type(sk), (idx))) +#define sk_CONF_VALUE_new(cmp) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new(ossl_check_CONF_VALUE_compfunc_type(cmp))) +#define sk_CONF_VALUE_new_null() ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new_null()) +#define sk_CONF_VALUE_new_reserve(cmp, n) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new_reserve(ossl_check_CONF_VALUE_compfunc_type(cmp), (n))) +#define sk_CONF_VALUE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CONF_VALUE_sk_type(sk), (n)) +#define sk_CONF_VALUE_free(sk) OPENSSL_sk_free(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_zero(sk) OPENSSL_sk_zero(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_delete(sk, i) ((CONF_VALUE *)OPENSSL_sk_delete(ossl_check_CONF_VALUE_sk_type(sk), (i))) +#define sk_CONF_VALUE_delete_ptr(sk, ptr) ((CONF_VALUE *)OPENSSL_sk_delete_ptr(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr))) +#define sk_CONF_VALUE_push(sk, ptr) OPENSSL_sk_push(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_pop(sk) ((CONF_VALUE *)OPENSSL_sk_pop(ossl_check_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_shift(sk) ((CONF_VALUE *)OPENSSL_sk_shift(ossl_check_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CONF_VALUE_sk_type(sk),ossl_check_CONF_VALUE_freefunc_type(freefunc)) +#define sk_CONF_VALUE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr), (idx)) +#define sk_CONF_VALUE_set(sk, idx, ptr) ((CONF_VALUE *)OPENSSL_sk_set(ossl_check_CONF_VALUE_sk_type(sk), (idx), ossl_check_CONF_VALUE_type(ptr))) +#define sk_CONF_VALUE_find(sk, ptr) OPENSSL_sk_find(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr), pnum) +#define sk_CONF_VALUE_sort(sk) OPENSSL_sk_sort(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_dup(sk) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_dup(ossl_check_const_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_deep_copy(ossl_check_const_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_copyfunc_type(copyfunc), ossl_check_CONF_VALUE_freefunc_type(freefunc))) +#define sk_CONF_VALUE_set_cmp_func(sk, cmp) ((sk_CONF_VALUE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_compfunc_type(cmp))) +DEFINE_LHASH_OF_INTERNAL(CONF_VALUE); +#define lh_CONF_VALUE_new(hfn, cmp) ((LHASH_OF(CONF_VALUE) *)OPENSSL_LH_new(ossl_check_CONF_VALUE_lh_hashfunc_type(hfn), ossl_check_CONF_VALUE_lh_compfunc_type(cmp))) +#define lh_CONF_VALUE_free(lh) OPENSSL_LH_free(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_flush(lh) OPENSSL_LH_flush(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_insert(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_insert(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_delete(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_delete(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_const_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_retrieve(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_retrieve(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_const_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_error(lh) OPENSSL_LH_error(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_num_items(lh) OPENSSL_LH_num_items(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_CONF_VALUE_lh_type(lh), dl) +#define lh_CONF_VALUE_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_CONF_VALUE_lh_doallfunc_type(dfn)) + + +struct conf_st; +struct conf_method_st; +typedef struct conf_method_st CONF_METHOD; + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# include +# endif + +/* Module definitions */ +typedef struct conf_imodule_st CONF_IMODULE; +typedef struct conf_module_st CONF_MODULE; + +STACK_OF(CONF_MODULE); +STACK_OF(CONF_IMODULE); + +/* DSO module function typedefs */ +typedef int conf_init_func (CONF_IMODULE *md, const CONF *cnf); +typedef void conf_finish_func (CONF_IMODULE *md); + +# define CONF_MFLAGS_IGNORE_ERRORS 0x1 +# define CONF_MFLAGS_IGNORE_RETURN_CODES 0x2 +# define CONF_MFLAGS_SILENT 0x4 +# define CONF_MFLAGS_NO_DSO 0x8 +# define CONF_MFLAGS_IGNORE_MISSING_FILE 0x10 +# define CONF_MFLAGS_DEFAULT_SECTION 0x20 + +int CONF_set_default_method(CONF_METHOD *meth); +void CONF_set_nconf(CONF *conf, LHASH_OF(CONF_VALUE) *hash); +LHASH_OF(CONF_VALUE) *CONF_load(LHASH_OF(CONF_VALUE) *conf, const char *file, + long *eline); +# ifndef OPENSSL_NO_STDIO +LHASH_OF(CONF_VALUE) *CONF_load_fp(LHASH_OF(CONF_VALUE) *conf, FILE *fp, + long *eline); +# endif +LHASH_OF(CONF_VALUE) *CONF_load_bio(LHASH_OF(CONF_VALUE) *conf, BIO *bp, + long *eline); +STACK_OF(CONF_VALUE) *CONF_get_section(LHASH_OF(CONF_VALUE) *conf, + const char *section); +char *CONF_get_string(LHASH_OF(CONF_VALUE) *conf, const char *group, + const char *name); +long CONF_get_number(LHASH_OF(CONF_VALUE) *conf, const char *group, + const char *name); +void CONF_free(LHASH_OF(CONF_VALUE) *conf); +#ifndef OPENSSL_NO_STDIO +int CONF_dump_fp(LHASH_OF(CONF_VALUE) *conf, FILE *out); +#endif +int CONF_dump_bio(LHASH_OF(CONF_VALUE) *conf, BIO *out); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void OPENSSL_config(const char *config_name); +#endif + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OPENSSL_no_config() \ + OPENSSL_init_crypto(OPENSSL_INIT_NO_LOAD_CONFIG, NULL) +#endif + +/* + * New conf code. The semantics are different from the functions above. If + * that wasn't the case, the above functions would have been replaced + */ + +CONF *NCONF_new_ex(OSSL_LIB_CTX *libctx, CONF_METHOD *meth); +OSSL_LIB_CTX *NCONF_get0_libctx(const CONF *conf); +CONF *NCONF_new(CONF_METHOD *meth); +CONF_METHOD *NCONF_default(void); +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 CONF_METHOD *NCONF_WIN32(void); +#endif +void NCONF_free(CONF *conf); +void NCONF_free_data(CONF *conf); + +int NCONF_load(CONF *conf, const char *file, long *eline); +# ifndef OPENSSL_NO_STDIO +int NCONF_load_fp(CONF *conf, FILE *fp, long *eline); +# endif +int NCONF_load_bio(CONF *conf, BIO *bp, long *eline); +STACK_OF(OPENSSL_CSTRING) *NCONF_get_section_names(const CONF *conf); +STACK_OF(CONF_VALUE) *NCONF_get_section(const CONF *conf, + const char *section); +char *NCONF_get_string(const CONF *conf, const char *group, const char *name); +int NCONF_get_number_e(const CONF *conf, const char *group, const char *name, + long *result); +#ifndef OPENSSL_NO_STDIO +int NCONF_dump_fp(const CONF *conf, FILE *out); +#endif +int NCONF_dump_bio(const CONF *conf, BIO *out); + +#define NCONF_get_number(c,g,n,r) NCONF_get_number_e(c,g,n,r) + +/* Module functions */ + +int CONF_modules_load(const CONF *cnf, const char *appname, + unsigned long flags); +int CONF_modules_load_file_ex(OSSL_LIB_CTX *libctx, const char *filename, + const char *appname, unsigned long flags); +int CONF_modules_load_file(const char *filename, const char *appname, + unsigned long flags); +void CONF_modules_unload(int all); +void CONF_modules_finish(void); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define CONF_modules_free() while(0) continue +#endif +int CONF_module_add(const char *name, conf_init_func *ifunc, + conf_finish_func *ffunc); + +const char *CONF_imodule_get_name(const CONF_IMODULE *md); +const char *CONF_imodule_get_value(const CONF_IMODULE *md); +void *CONF_imodule_get_usr_data(const CONF_IMODULE *md); +void CONF_imodule_set_usr_data(CONF_IMODULE *md, void *usr_data); +CONF_MODULE *CONF_imodule_get_module(const CONF_IMODULE *md); +unsigned long CONF_imodule_get_flags(const CONF_IMODULE *md); +void CONF_imodule_set_flags(CONF_IMODULE *md, unsigned long flags); +void *CONF_module_get_usr_data(CONF_MODULE *pmod); +void CONF_module_set_usr_data(CONF_MODULE *pmod, void *usr_data); + +char *CONF_get1_default_config_file(void); + +int CONF_parse_list(const char *list, int sep, int nospc, + int (*list_cb) (const char *elem, int len, void *usr), + void *arg); + +void OPENSSL_load_builtin_modules(void); + + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/configuration.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/configuration.h new file mode 100644 index 00000000000..9c7c3922e25 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/configuration.h @@ -0,0 +1,158 @@ +/* + * WARNING: do not edit! + * Generated by configdata.pm from Configurations/common0.tmpl, Configurations/unix-Makefile.tmpl + * via Makefile.in + * + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_CONFIGURATION_H +# define OPENSSL_CONFIGURATION_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +# ifdef OPENSSL_ALGORITHM_DEFINES +# error OPENSSL_ALGORITHM_DEFINES no longer supported +# endif + +/* + * OpenSSL was configured with the following options: + */ + +# define OPENSSL_CONFIGURED_API 30200 +# ifndef OPENSSL_RAND_SEED_OS +# define OPENSSL_RAND_SEED_OS +# endif +# ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +# endif +# ifndef OPENSSL_NO_ASAN +# define OPENSSL_NO_ASAN +# endif +# ifndef OPENSSL_NO_ASM +# define OPENSSL_NO_ASM +# endif +# ifndef OPENSSL_NO_BROTLI +# define OPENSSL_NO_BROTLI +# endif +# ifndef OPENSSL_NO_BROTLI_DYNAMIC +# define OPENSSL_NO_BROTLI_DYNAMIC +# endif +# ifndef OPENSSL_NO_CRYPTO_MDEBUG +# define OPENSSL_NO_CRYPTO_MDEBUG +# endif +# ifndef OPENSSL_NO_CRYPTO_MDEBUG_BACKTRACE +# define OPENSSL_NO_CRYPTO_MDEBUG_BACKTRACE +# endif +# ifndef OPENSSL_NO_DEVCRYPTOENG +# define OPENSSL_NO_DEVCRYPTOENG +# endif +# ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +# endif +# ifndef OPENSSL_NO_EGD +# define OPENSSL_NO_EGD +# endif +# ifndef OPENSSL_NO_EXTERNAL_TESTS +# define OPENSSL_NO_EXTERNAL_TESTS +# endif +# ifndef OPENSSL_NO_FUZZ_AFL +# define OPENSSL_NO_FUZZ_AFL +# endif +# ifndef OPENSSL_NO_FUZZ_LIBFUZZER +# define OPENSSL_NO_FUZZ_LIBFUZZER +# endif +# ifndef OPENSSL_NO_KTLS +# define OPENSSL_NO_KTLS +# endif +# ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +# endif +# ifndef OPENSSL_NO_MSAN +# define OPENSSL_NO_MSAN +# endif +# ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +# endif +# ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +# endif +# ifndef OPENSSL_NO_SSL3 +# define OPENSSL_NO_SSL3 +# endif +# ifndef OPENSSL_NO_SSL3_METHOD +# define OPENSSL_NO_SSL3_METHOD +# endif +# ifndef OPENSSL_NO_TFO +# define OPENSSL_NO_TFO +# endif +# ifndef OPENSSL_NO_TRACE +# define OPENSSL_NO_TRACE +# endif +# ifndef OPENSSL_NO_UBSAN +# define OPENSSL_NO_UBSAN +# endif +# ifndef OPENSSL_NO_UNIT_TEST +# define OPENSSL_NO_UNIT_TEST +# endif +# ifndef OPENSSL_NO_UPLINK +# define OPENSSL_NO_UPLINK +# endif +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS +# define OPENSSL_NO_WEAK_SSL_CIPHERS +# endif +# ifndef OPENSSL_NO_WINSTORE +# define OPENSSL_NO_WINSTORE +# endif +# ifndef OPENSSL_NO_ZLIB +# define OPENSSL_NO_ZLIB +# endif +# ifndef OPENSSL_NO_ZLIB_DYNAMIC +# define OPENSSL_NO_ZLIB_DYNAMIC +# endif +# ifndef OPENSSL_NO_ZSTD +# define OPENSSL_NO_ZSTD +# endif +# ifndef OPENSSL_NO_ZSTD_DYNAMIC +# define OPENSSL_NO_ZSTD_DYNAMIC +# endif +# ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +# endif + + +/* Generate 80386 code? */ +# undef I386_ONLY + +/* + * The following are cipher-specific, but are part of the public API. + */ +# if !defined(OPENSSL_SYS_UEFI) +# undef BN_LLONG +/* Only one for the following should be defined */ +# define SIXTY_FOUR_BIT_LONG +# undef SIXTY_FOUR_BIT +# undef THIRTY_TWO_BIT +# endif + +# define RC4_INT unsigned char + +# if defined(OPENSSL_NO_COMP) || (defined(OPENSSL_NO_BROTLI) && defined(OPENSSL_NO_ZSTD) && defined(OPENSSL_NO_ZLIB)) +# define OPENSSL_NO_COMP_ALG +# else +# undef OPENSSL_NO_COMP_ALG +# endif + +# ifdef __cplusplus +} +# endif + +#endif /* OPENSSL_CONFIGURATION_H */ diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/core_names.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/core_names.h new file mode 100644 index 00000000000..ffffe90f16f --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/core_names.h @@ -0,0 +1,475 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/core_names.h.in + * + * Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#ifndef OPENSSL_CORE_NAMES_H +# define OPENSSL_CORE_NAMES_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* OSSL_CIPHER_PARAM_CTS_MODE Values */ +# define OSSL_CIPHER_CTS_MODE_CS1 "CS1" +# define OSSL_CIPHER_CTS_MODE_CS2 "CS2" +# define OSSL_CIPHER_CTS_MODE_CS3 "CS3" + +/* Known CIPHER names (not a complete list) */ +# define OSSL_CIPHER_NAME_AES_128_GCM_SIV "AES-128-GCM-SIV" +# define OSSL_CIPHER_NAME_AES_192_GCM_SIV "AES-192-GCM-SIV" +# define OSSL_CIPHER_NAME_AES_256_GCM_SIV "AES-256-GCM-SIV" + +/* Known DIGEST names (not a complete list) */ +# define OSSL_DIGEST_NAME_MD5 "MD5" +# define OSSL_DIGEST_NAME_MD5_SHA1 "MD5-SHA1" +# define OSSL_DIGEST_NAME_SHA1 "SHA1" +# define OSSL_DIGEST_NAME_SHA2_224 "SHA2-224" +# define OSSL_DIGEST_NAME_SHA2_256 "SHA2-256" +# define OSSL_DIGEST_NAME_SHA2_256_192 "SHA2-256/192" +# define OSSL_DIGEST_NAME_SHA2_384 "SHA2-384" +# define OSSL_DIGEST_NAME_SHA2_512 "SHA2-512" +# define OSSL_DIGEST_NAME_SHA2_512_224 "SHA2-512/224" +# define OSSL_DIGEST_NAME_SHA2_512_256 "SHA2-512/256" +# define OSSL_DIGEST_NAME_MD2 "MD2" +# define OSSL_DIGEST_NAME_MD4 "MD4" +# define OSSL_DIGEST_NAME_MDC2 "MDC2" +# define OSSL_DIGEST_NAME_RIPEMD160 "RIPEMD160" +# define OSSL_DIGEST_NAME_SHA3_224 "SHA3-224" +# define OSSL_DIGEST_NAME_SHA3_256 "SHA3-256" +# define OSSL_DIGEST_NAME_SHA3_384 "SHA3-384" +# define OSSL_DIGEST_NAME_SHA3_512 "SHA3-512" +# define OSSL_DIGEST_NAME_KECCAK_KMAC128 "KECCAK-KMAC-128" +# define OSSL_DIGEST_NAME_KECCAK_KMAC256 "KECCAK-KMAC-256" +# define OSSL_DIGEST_NAME_SM3 "SM3" + +/* Known MAC names */ +# define OSSL_MAC_NAME_BLAKE2BMAC "BLAKE2BMAC" +# define OSSL_MAC_NAME_BLAKE2SMAC "BLAKE2SMAC" +# define OSSL_MAC_NAME_CMAC "CMAC" +# define OSSL_MAC_NAME_GMAC "GMAC" +# define OSSL_MAC_NAME_HMAC "HMAC" +# define OSSL_MAC_NAME_KMAC128 "KMAC128" +# define OSSL_MAC_NAME_KMAC256 "KMAC256" +# define OSSL_MAC_NAME_POLY1305 "POLY1305" +# define OSSL_MAC_NAME_SIPHASH "SIPHASH" + +/* Known KDF names */ +# define OSSL_KDF_NAME_HKDF "HKDF" +# define OSSL_KDF_NAME_TLS1_3_KDF "TLS13-KDF" +# define OSSL_KDF_NAME_PBKDF1 "PBKDF1" +# define OSSL_KDF_NAME_PBKDF2 "PBKDF2" +# define OSSL_KDF_NAME_SCRYPT "SCRYPT" +# define OSSL_KDF_NAME_SSHKDF "SSHKDF" +# define OSSL_KDF_NAME_SSKDF "SSKDF" +# define OSSL_KDF_NAME_TLS1_PRF "TLS1-PRF" +# define OSSL_KDF_NAME_X942KDF_ASN1 "X942KDF-ASN1" +# define OSSL_KDF_NAME_X942KDF_CONCAT "X942KDF-CONCAT" +# define OSSL_KDF_NAME_X963KDF "X963KDF" +# define OSSL_KDF_NAME_KBKDF "KBKDF" +# define OSSL_KDF_NAME_KRB5KDF "KRB5KDF" +# define OSSL_KDF_NAME_HMACDRBGKDF "HMAC-DRBG-KDF" + +/* RSA padding modes */ +# define OSSL_PKEY_RSA_PAD_MODE_NONE "none" +# define OSSL_PKEY_RSA_PAD_MODE_PKCSV15 "pkcs1" +# define OSSL_PKEY_RSA_PAD_MODE_OAEP "oaep" +# define OSSL_PKEY_RSA_PAD_MODE_X931 "x931" +# define OSSL_PKEY_RSA_PAD_MODE_PSS "pss" + +/* RSA pss padding salt length */ +# define OSSL_PKEY_RSA_PSS_SALT_LEN_DIGEST "digest" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_MAX "max" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_AUTO "auto" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_AUTO_DIGEST_MAX "auto-digestmax" + +/* OSSL_PKEY_PARAM_EC_ENCODING values */ +# define OSSL_PKEY_EC_ENCODING_EXPLICIT "explicit" +# define OSSL_PKEY_EC_ENCODING_GROUP "named_curve" + +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_UNCOMPRESSED "uncompressed" +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_COMPRESSED "compressed" +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_HYBRID "hybrid" + +# define OSSL_PKEY_EC_GROUP_CHECK_DEFAULT "default" +# define OSSL_PKEY_EC_GROUP_CHECK_NAMED "named" +# define OSSL_PKEY_EC_GROUP_CHECK_NAMED_NIST "named-nist" + +/* OSSL_KEM_PARAM_OPERATION values */ +#define OSSL_KEM_PARAM_OPERATION_RSASVE "RSASVE" +#define OSSL_KEM_PARAM_OPERATION_DHKEM "DHKEM" + +/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */ +# define OSSL_ALG_PARAM_CIPHER "cipher" +# define OSSL_ALG_PARAM_DIGEST "digest" +# define OSSL_ALG_PARAM_ENGINE "engine" +# define OSSL_ALG_PARAM_MAC "mac" +# define OSSL_ALG_PARAM_PROPERTIES "properties" +# define OSSL_ASYM_CIPHER_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_ENGINE OSSL_PKEY_PARAM_ENGINE +# define OSSL_ASYM_CIPHER_PARAM_IMPLICIT_REJECTION "implicit-rejection" +# define OSSL_ASYM_CIPHER_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_MGF1_DIGEST_PROPS OSSL_PKEY_PARAM_MGF1_PROPERTIES +# define OSSL_ASYM_CIPHER_PARAM_OAEP_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS "digest-props" +# define OSSL_ASYM_CIPHER_PARAM_OAEP_LABEL "oaep-label" +# define OSSL_ASYM_CIPHER_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE +# define OSSL_ASYM_CIPHER_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION "tls-client-version" +# define OSSL_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION "tls-negotiated-version" +# define OSSL_CAPABILITY_TLS_GROUP_ALG "tls-group-alg" +# define OSSL_CAPABILITY_TLS_GROUP_ID "tls-group-id" +# define OSSL_CAPABILITY_TLS_GROUP_IS_KEM "tls-group-is-kem" +# define OSSL_CAPABILITY_TLS_GROUP_MAX_DTLS "tls-max-dtls" +# define OSSL_CAPABILITY_TLS_GROUP_MAX_TLS "tls-max-tls" +# define OSSL_CAPABILITY_TLS_GROUP_MIN_DTLS "tls-min-dtls" +# define OSSL_CAPABILITY_TLS_GROUP_MIN_TLS "tls-min-tls" +# define OSSL_CAPABILITY_TLS_GROUP_NAME "tls-group-name" +# define OSSL_CAPABILITY_TLS_GROUP_NAME_INTERNAL "tls-group-name-internal" +# define OSSL_CAPABILITY_TLS_GROUP_SECURITY_BITS "tls-group-sec-bits" +# define OSSL_CAPABILITY_TLS_SIGALG_CODE_POINT "tls-sigalg-code-point" +# define OSSL_CAPABILITY_TLS_SIGALG_HASH_NAME "tls-sigalg-hash-name" +# define OSSL_CAPABILITY_TLS_SIGALG_HASH_OID "tls-sigalg-hash-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_IANA_NAME "tls-sigalg-iana-name" +# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE "tls-sigalg-keytype" +# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE_OID "tls-sigalg-keytype-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_MAX_TLS "tls-max-tls" +# define OSSL_CAPABILITY_TLS_SIGALG_MIN_TLS "tls-min-tls" +# define OSSL_CAPABILITY_TLS_SIGALG_NAME "tls-sigalg-name" +# define OSSL_CAPABILITY_TLS_SIGALG_OID "tls-sigalg-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_SECURITY_BITS "tls-sigalg-sec-bits" +# define OSSL_CAPABILITY_TLS_SIGALG_SIG_NAME "tls-sigalg-sig-name" +# define OSSL_CAPABILITY_TLS_SIGALG_SIG_OID "tls-sigalg-sig-oid" +# define OSSL_CIPHER_PARAM_AEAD "aead" +# define OSSL_CIPHER_PARAM_AEAD_IVLEN OSSL_CIPHER_PARAM_IVLEN +# define OSSL_CIPHER_PARAM_AEAD_MAC_KEY "mackey" +# define OSSL_CIPHER_PARAM_AEAD_TAG "tag" +# define OSSL_CIPHER_PARAM_AEAD_TAGLEN "taglen" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_AAD "tlsaad" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_AAD_PAD "tlsaadpad" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN "tlsivgen" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_IV_FIXED "tlsivfixed" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV "tlsivinv" +# define OSSL_CIPHER_PARAM_ALGORITHM_ID_PARAMS "alg_id_param" +# define OSSL_CIPHER_PARAM_BLOCK_SIZE "blocksize" +# define OSSL_CIPHER_PARAM_CTS "cts" +# define OSSL_CIPHER_PARAM_CTS_MODE "cts_mode" +# define OSSL_CIPHER_PARAM_CUSTOM_IV "custom-iv" +# define OSSL_CIPHER_PARAM_HAS_RAND_KEY "has-randkey" +# define OSSL_CIPHER_PARAM_IV "iv" +# define OSSL_CIPHER_PARAM_IVLEN "ivlen" +# define OSSL_CIPHER_PARAM_KEYLEN "keylen" +# define OSSL_CIPHER_PARAM_MODE "mode" +# define OSSL_CIPHER_PARAM_NUM "num" +# define OSSL_CIPHER_PARAM_PADDING "padding" +# define OSSL_CIPHER_PARAM_RANDOM_KEY "randkey" +# define OSSL_CIPHER_PARAM_RC2_KEYBITS "keybits" +# define OSSL_CIPHER_PARAM_ROUNDS "rounds" +# define OSSL_CIPHER_PARAM_SPEED "speed" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK "tls-multi" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD "tls1multi_aad" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN "tls1multi_aadpacklen" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC "tls1multi_enc" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN "tls1multi_encin" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN "tls1multi_enclen" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE "tls1multi_interleave" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE "tls1multi_maxbufsz" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT "tls1multi_maxsndfrag" +# define OSSL_CIPHER_PARAM_TLS_MAC "tls-mac" +# define OSSL_CIPHER_PARAM_TLS_MAC_SIZE "tls-mac-size" +# define OSSL_CIPHER_PARAM_TLS_VERSION "tls-version" +# define OSSL_CIPHER_PARAM_UPDATED_IV "updated-iv" +# define OSSL_CIPHER_PARAM_USE_BITS "use-bits" +# define OSSL_CIPHER_PARAM_XTS_STANDARD "xts_standard" +# define OSSL_DECODER_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_DIGEST_PARAM_ALGID_ABSENT "algid-absent" +# define OSSL_DIGEST_PARAM_BLOCK_SIZE "blocksize" +# define OSSL_DIGEST_PARAM_MICALG "micalg" +# define OSSL_DIGEST_PARAM_PAD_TYPE "pad-type" +# define OSSL_DIGEST_PARAM_SIZE "size" +# define OSSL_DIGEST_PARAM_SSL3_MS "ssl3-ms" +# define OSSL_DIGEST_PARAM_XOF "xof" +# define OSSL_DIGEST_PARAM_XOFLEN "xoflen" +# define OSSL_DRBG_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_DRBG_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_DRBG_PARAM_ENTROPY_REQUIRED "entropy_required" +# define OSSL_DRBG_PARAM_MAC OSSL_ALG_PARAM_MAC +# define OSSL_DRBG_PARAM_MAX_ADINLEN "max_adinlen" +# define OSSL_DRBG_PARAM_MAX_ENTROPYLEN "max_entropylen" +# define OSSL_DRBG_PARAM_MAX_LENGTH "maxium_length" +# define OSSL_DRBG_PARAM_MAX_NONCELEN "max_noncelen" +# define OSSL_DRBG_PARAM_MAX_PERSLEN "max_perslen" +# define OSSL_DRBG_PARAM_MIN_ENTROPYLEN "min_entropylen" +# define OSSL_DRBG_PARAM_MIN_LENGTH "minium_length" +# define OSSL_DRBG_PARAM_MIN_NONCELEN "min_noncelen" +# define OSSL_DRBG_PARAM_PREDICTION_RESISTANCE "prediction_resistance" +# define OSSL_DRBG_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_DRBG_PARAM_RANDOM_DATA "random_data" +# define OSSL_DRBG_PARAM_RESEED_COUNTER "reseed_counter" +# define OSSL_DRBG_PARAM_RESEED_REQUESTS "reseed_requests" +# define OSSL_DRBG_PARAM_RESEED_TIME "reseed_time" +# define OSSL_DRBG_PARAM_RESEED_TIME_INTERVAL "reseed_time_interval" +# define OSSL_DRBG_PARAM_SIZE "size" +# define OSSL_DRBG_PARAM_USE_DF "use_derivation_function" +# define OSSL_ENCODER_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_ENCODER_PARAM_ENCRYPT_LEVEL "encrypt-level" +# define OSSL_ENCODER_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_ENCODER_PARAM_SAVE_PARAMETERS "save-parameters" +# define OSSL_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE "ecdh-cofactor-mode" +# define OSSL_EXCHANGE_PARAM_KDF_DIGEST "kdf-digest" +# define OSSL_EXCHANGE_PARAM_KDF_DIGEST_PROPS "kdf-digest-props" +# define OSSL_EXCHANGE_PARAM_KDF_OUTLEN "kdf-outlen" +# define OSSL_EXCHANGE_PARAM_KDF_TYPE "kdf-type" +# define OSSL_EXCHANGE_PARAM_KDF_UKM "kdf-ukm" +# define OSSL_EXCHANGE_PARAM_PAD "pad" +# define OSSL_GEN_PARAM_ITERATION "iteration" +# define OSSL_GEN_PARAM_POTENTIAL "potential" +# define OSSL_KDF_PARAM_ARGON2_AD "ad" +# define OSSL_KDF_PARAM_ARGON2_LANES "lanes" +# define OSSL_KDF_PARAM_ARGON2_MEMCOST "memcost" +# define OSSL_KDF_PARAM_ARGON2_VERSION "version" +# define OSSL_KDF_PARAM_CEK_ALG "cekalg" +# define OSSL_KDF_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_KDF_PARAM_CONSTANT "constant" +# define OSSL_KDF_PARAM_DATA "data" +# define OSSL_KDF_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_KDF_PARAM_EARLY_CLEAN "early_clean" +# define OSSL_KDF_PARAM_HMACDRBG_ENTROPY "entropy" +# define OSSL_KDF_PARAM_HMACDRBG_NONCE "nonce" +# define OSSL_KDF_PARAM_INFO "info" +# define OSSL_KDF_PARAM_ITER "iter" +# define OSSL_KDF_PARAM_KBKDF_R "r" +# define OSSL_KDF_PARAM_KBKDF_USE_L "use-l" +# define OSSL_KDF_PARAM_KBKDF_USE_SEPARATOR "use-separator" +# define OSSL_KDF_PARAM_KEY "key" +# define OSSL_KDF_PARAM_LABEL "label" +# define OSSL_KDF_PARAM_MAC OSSL_ALG_PARAM_MAC +# define OSSL_KDF_PARAM_MAC_SIZE "maclen" +# define OSSL_KDF_PARAM_MODE "mode" +# define OSSL_KDF_PARAM_PASSWORD "pass" +# define OSSL_KDF_PARAM_PKCS12_ID "id" +# define OSSL_KDF_PARAM_PKCS5 "pkcs5" +# define OSSL_KDF_PARAM_PREFIX "prefix" +# define OSSL_KDF_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_KDF_PARAM_SALT "salt" +# define OSSL_KDF_PARAM_SCRYPT_MAXMEM "maxmem_bytes" +# define OSSL_KDF_PARAM_SCRYPT_N "n" +# define OSSL_KDF_PARAM_SCRYPT_P "p" +# define OSSL_KDF_PARAM_SCRYPT_R "r" +# define OSSL_KDF_PARAM_SECRET "secret" +# define OSSL_KDF_PARAM_SEED "seed" +# define OSSL_KDF_PARAM_SIZE "size" +# define OSSL_KDF_PARAM_SSHKDF_SESSION_ID "session_id" +# define OSSL_KDF_PARAM_SSHKDF_TYPE "type" +# define OSSL_KDF_PARAM_SSHKDF_XCGHASH "xcghash" +# define OSSL_KDF_PARAM_THREADS "threads" +# define OSSL_KDF_PARAM_UKM "ukm" +# define OSSL_KDF_PARAM_X942_ACVPINFO "acvp-info" +# define OSSL_KDF_PARAM_X942_PARTYUINFO "partyu-info" +# define OSSL_KDF_PARAM_X942_PARTYVINFO "partyv-info" +# define OSSL_KDF_PARAM_X942_SUPP_PRIVINFO "supp-privinfo" +# define OSSL_KDF_PARAM_X942_SUPP_PUBINFO "supp-pubinfo" +# define OSSL_KDF_PARAM_X942_USE_KEYBITS "use-keybits" +# define OSSL_KEM_PARAM_IKME "ikme" +# define OSSL_KEM_PARAM_OPERATION "operation" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING "block_padding" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA "max_early_data" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN "max_frag_len" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MODE "mode" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_OPTIONS "options" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD "read_ahead" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC "stream_mac" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_TLSTREE "tlstree" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_USE_ETM "use_etm" +# define OSSL_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN "read_buffer_len" +# define OSSL_MAC_PARAM_BLOCK_SIZE "block-size" +# define OSSL_MAC_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_MAC_PARAM_CUSTOM "custom" +# define OSSL_MAC_PARAM_C_ROUNDS "c-rounds" +# define OSSL_MAC_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_MAC_PARAM_DIGEST_NOINIT "digest-noinit" +# define OSSL_MAC_PARAM_DIGEST_ONESHOT "digest-oneshot" +# define OSSL_MAC_PARAM_D_ROUNDS "d-rounds" +# define OSSL_MAC_PARAM_IV "iv" +# define OSSL_MAC_PARAM_KEY "key" +# define OSSL_MAC_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_MAC_PARAM_SALT "salt" +# define OSSL_MAC_PARAM_SIZE "size" +# define OSSL_MAC_PARAM_TLS_DATA_SIZE "tls-data-size" +# define OSSL_MAC_PARAM_XOF "xof" +# define OSSL_OBJECT_PARAM_DATA "data" +# define OSSL_OBJECT_PARAM_DATA_STRUCTURE "data-structure" +# define OSSL_OBJECT_PARAM_DATA_TYPE "data-type" +# define OSSL_OBJECT_PARAM_DESC "desc" +# define OSSL_OBJECT_PARAM_REFERENCE "reference" +# define OSSL_OBJECT_PARAM_TYPE "type" +# define OSSL_PASSPHRASE_PARAM_INFO "info" +# define OSSL_PKEY_PARAM_BITS "bits" +# define OSSL_PKEY_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_PKEY_PARAM_DEFAULT_DIGEST "default-digest" +# define OSSL_PKEY_PARAM_DHKEM_IKM "dhkem-ikm" +# define OSSL_PKEY_PARAM_DH_GENERATOR "safeprime-generator" +# define OSSL_PKEY_PARAM_DH_PRIV_LEN "priv_len" +# define OSSL_PKEY_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_PKEY_PARAM_DIGEST_SIZE "digest-size" +# define OSSL_PKEY_PARAM_DIST_ID "distid" +# define OSSL_PKEY_PARAM_EC_A "a" +# define OSSL_PKEY_PARAM_EC_B "b" +# define OSSL_PKEY_PARAM_EC_CHAR2_M "m" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K1 "k1" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K2 "k2" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K3 "k3" +# define OSSL_PKEY_PARAM_EC_CHAR2_TP_BASIS "tp" +# define OSSL_PKEY_PARAM_EC_CHAR2_TYPE "basis-type" +# define OSSL_PKEY_PARAM_EC_COFACTOR "cofactor" +# define OSSL_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS "decoded-from-explicit" +# define OSSL_PKEY_PARAM_EC_ENCODING "encoding" +# define OSSL_PKEY_PARAM_EC_FIELD_TYPE "field-type" +# define OSSL_PKEY_PARAM_EC_GENERATOR "generator" +# define OSSL_PKEY_PARAM_EC_GROUP_CHECK_TYPE "group-check" +# define OSSL_PKEY_PARAM_EC_INCLUDE_PUBLIC "include-public" +# define OSSL_PKEY_PARAM_EC_ORDER "order" +# define OSSL_PKEY_PARAM_EC_P "p" +# define OSSL_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT "point-format" +# define OSSL_PKEY_PARAM_EC_PUB_X "qx" +# define OSSL_PKEY_PARAM_EC_PUB_Y "qy" +# define OSSL_PKEY_PARAM_EC_SEED "seed" +# define OSSL_PKEY_PARAM_ENCODED_PUBLIC_KEY "encoded-pub-key" +# define OSSL_PKEY_PARAM_ENGINE OSSL_ALG_PARAM_ENGINE +# define OSSL_PKEY_PARAM_FFC_COFACTOR "j" +# define OSSL_PKEY_PARAM_FFC_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_PKEY_PARAM_FFC_DIGEST_PROPS OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_FFC_G "g" +# define OSSL_PKEY_PARAM_FFC_GINDEX "gindex" +# define OSSL_PKEY_PARAM_FFC_H "hindex" +# define OSSL_PKEY_PARAM_FFC_P "p" +# define OSSL_PKEY_PARAM_FFC_PBITS "pbits" +# define OSSL_PKEY_PARAM_FFC_PCOUNTER "pcounter" +# define OSSL_PKEY_PARAM_FFC_Q "q" +# define OSSL_PKEY_PARAM_FFC_QBITS "qbits" +# define OSSL_PKEY_PARAM_FFC_SEED "seed" +# define OSSL_PKEY_PARAM_FFC_TYPE "type" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_G "validate-g" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_LEGACY "validate-legacy" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_PQ "validate-pq" +# define OSSL_PKEY_PARAM_GROUP_NAME "group" +# define OSSL_PKEY_PARAM_IMPLICIT_REJECTION "implicit-rejection" +# define OSSL_PKEY_PARAM_MANDATORY_DIGEST "mandatory-digest" +# define OSSL_PKEY_PARAM_MASKGENFUNC "mgf" +# define OSSL_PKEY_PARAM_MAX_SIZE "max-size" +# define OSSL_PKEY_PARAM_MGF1_DIGEST "mgf1-digest" +# define OSSL_PKEY_PARAM_MGF1_PROPERTIES "mgf1-properties" +# define OSSL_PKEY_PARAM_PAD_MODE "pad-mode" +# define OSSL_PKEY_PARAM_PRIV_KEY "priv" +# define OSSL_PKEY_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_PUB_KEY "pub" +# define OSSL_PKEY_PARAM_RSA_BITS OSSL_PKEY_PARAM_BITS +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT "rsa-coefficient" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT1 "rsa-coefficient1" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT2 "rsa-coefficient2" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT3 "rsa-coefficient3" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT4 "rsa-coefficient4" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT5 "rsa-coefficient5" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT6 "rsa-coefficient6" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT7 "rsa-coefficient7" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT8 "rsa-coefficient8" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT9 "rsa-coefficient9" +# define OSSL_PKEY_PARAM_RSA_D "d" +# define OSSL_PKEY_PARAM_RSA_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_PKEY_PARAM_RSA_DIGEST_PROPS OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_RSA_E "e" +# define OSSL_PKEY_PARAM_RSA_EXPONENT "rsa-exponent" +# define OSSL_PKEY_PARAM_RSA_EXPONENT1 "rsa-exponent1" +# define OSSL_PKEY_PARAM_RSA_EXPONENT10 "rsa-exponent10" +# define OSSL_PKEY_PARAM_RSA_EXPONENT2 "rsa-exponent2" +# define OSSL_PKEY_PARAM_RSA_EXPONENT3 "rsa-exponent3" +# define OSSL_PKEY_PARAM_RSA_EXPONENT4 "rsa-exponent4" +# define OSSL_PKEY_PARAM_RSA_EXPONENT5 "rsa-exponent5" +# define OSSL_PKEY_PARAM_RSA_EXPONENT6 "rsa-exponent6" +# define OSSL_PKEY_PARAM_RSA_EXPONENT7 "rsa-exponent7" +# define OSSL_PKEY_PARAM_RSA_EXPONENT8 "rsa-exponent8" +# define OSSL_PKEY_PARAM_RSA_EXPONENT9 "rsa-exponent9" +# define OSSL_PKEY_PARAM_RSA_FACTOR "rsa-factor" +# define OSSL_PKEY_PARAM_RSA_FACTOR1 "rsa-factor1" +# define OSSL_PKEY_PARAM_RSA_FACTOR10 "rsa-factor10" +# define OSSL_PKEY_PARAM_RSA_FACTOR2 "rsa-factor2" +# define OSSL_PKEY_PARAM_RSA_FACTOR3 "rsa-factor3" +# define OSSL_PKEY_PARAM_RSA_FACTOR4 "rsa-factor4" +# define OSSL_PKEY_PARAM_RSA_FACTOR5 "rsa-factor5" +# define OSSL_PKEY_PARAM_RSA_FACTOR6 "rsa-factor6" +# define OSSL_PKEY_PARAM_RSA_FACTOR7 "rsa-factor7" +# define OSSL_PKEY_PARAM_RSA_FACTOR8 "rsa-factor8" +# define OSSL_PKEY_PARAM_RSA_FACTOR9 "rsa-factor9" +# define OSSL_PKEY_PARAM_RSA_MASKGENFUNC OSSL_PKEY_PARAM_MASKGENFUNC +# define OSSL_PKEY_PARAM_RSA_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_PKEY_PARAM_RSA_N "n" +# define OSSL_PKEY_PARAM_RSA_PRIMES "primes" +# define OSSL_PKEY_PARAM_RSA_PSS_SALTLEN "saltlen" +# define OSSL_PKEY_PARAM_RSA_TEST_P1 "p1" +# define OSSL_PKEY_PARAM_RSA_TEST_P2 "p2" +# define OSSL_PKEY_PARAM_RSA_TEST_Q1 "q1" +# define OSSL_PKEY_PARAM_RSA_TEST_Q2 "q2" +# define OSSL_PKEY_PARAM_RSA_TEST_XP "xp" +# define OSSL_PKEY_PARAM_RSA_TEST_XP1 "xp1" +# define OSSL_PKEY_PARAM_RSA_TEST_XP2 "xp2" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ "xq" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ1 "xq1" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ2 "xq2" +# define OSSL_PKEY_PARAM_SECURITY_BITS "security-bits" +# define OSSL_PKEY_PARAM_USE_COFACTOR_ECDH OSSL_PKEY_PARAM_USE_COFACTOR_FLAG +# define OSSL_PKEY_PARAM_USE_COFACTOR_FLAG "use-cofactor-flag" +# define OSSL_PROV_PARAM_BUILDINFO "buildinfo" +# define OSSL_PROV_PARAM_CORE_MODULE_FILENAME "module-filename" +# define OSSL_PROV_PARAM_CORE_PROV_NAME "provider-name" +# define OSSL_PROV_PARAM_CORE_VERSION "openssl-version" +# define OSSL_PROV_PARAM_DRBG_TRUNC_DIGEST "drbg-no-trunc-md" +# define OSSL_PROV_PARAM_NAME "name" +# define OSSL_PROV_PARAM_SECURITY_CHECKS "security-checks" +# define OSSL_PROV_PARAM_SELF_TEST_DESC "st-desc" +# define OSSL_PROV_PARAM_SELF_TEST_PHASE "st-phase" +# define OSSL_PROV_PARAM_SELF_TEST_TYPE "st-type" +# define OSSL_PROV_PARAM_STATUS "status" +# define OSSL_PROV_PARAM_TLS1_PRF_EMS_CHECK "tls1-prf-ems-check" +# define OSSL_PROV_PARAM_VERSION "version" +# define OSSL_RAND_PARAM_GENERATE "generate" +# define OSSL_RAND_PARAM_MAX_REQUEST "max_request" +# define OSSL_RAND_PARAM_STATE "state" +# define OSSL_RAND_PARAM_STRENGTH "strength" +# define OSSL_RAND_PARAM_TEST_ENTROPY "test_entropy" +# define OSSL_RAND_PARAM_TEST_NONCE "test_nonce" +# define OSSL_SIGNATURE_PARAM_ALGORITHM_ID "algorithm-id" +# define OSSL_SIGNATURE_PARAM_CONTEXT_STRING "context-string" +# define OSSL_SIGNATURE_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_SIGNATURE_PARAM_DIGEST_SIZE OSSL_PKEY_PARAM_DIGEST_SIZE +# define OSSL_SIGNATURE_PARAM_INSTANCE "instance" +# define OSSL_SIGNATURE_PARAM_KAT "kat" +# define OSSL_SIGNATURE_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_SIGNATURE_PARAM_MGF1_PROPERTIES OSSL_PKEY_PARAM_MGF1_PROPERTIES +# define OSSL_SIGNATURE_PARAM_NONCE_TYPE "nonce-type" +# define OSSL_SIGNATURE_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE +# define OSSL_SIGNATURE_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_SIGNATURE_PARAM_PSS_SALTLEN "saltlen" +# define OSSL_STORE_PARAM_ALIAS "alias" +# define OSSL_STORE_PARAM_DIGEST "digest" +# define OSSL_STORE_PARAM_EXPECT "expect" +# define OSSL_STORE_PARAM_FINGERPRINT "fingerprint" +# define OSSL_STORE_PARAM_INPUT_TYPE "input-type" +# define OSSL_STORE_PARAM_ISSUER "name" +# define OSSL_STORE_PARAM_PROPERTIES "properties" +# define OSSL_STORE_PARAM_SERIAL "serial" +# define OSSL_STORE_PARAM_SUBJECT "subject" + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/crmf.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/crmf.h new file mode 100644 index 00000000000..1f901f35f89 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/crmf.h @@ -0,0 +1,229 @@ +/*- + * WARNING: do not edit! + * Generated by Makefile from include/openssl/crmf.h.in + * + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright Nokia 2007-2019 + * Copyright Siemens AG 2015-2019 + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * CRMF (RFC 4211) implementation by M. Peylo, M. Viljanen, and D. von Oheimb. + */ + + + +#ifndef OPENSSL_CRMF_H +# define OPENSSL_CRMF_H + +# include + +# ifndef OPENSSL_NO_CRMF +# include +# include +# include +# include /* for GENERAL_NAME etc. */ + +/* explicit #includes not strictly needed since implied by the above: */ +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# define OSSL_CRMF_POPOPRIVKEY_THISMESSAGE 0 +# define OSSL_CRMF_POPOPRIVKEY_SUBSEQUENTMESSAGE 1 +# define OSSL_CRMF_POPOPRIVKEY_DHMAC 2 +# define OSSL_CRMF_POPOPRIVKEY_AGREEMAC 3 +# define OSSL_CRMF_POPOPRIVKEY_ENCRYPTEDKEY 4 + +# define OSSL_CRMF_SUBSEQUENTMESSAGE_ENCRCERT 0 +# define OSSL_CRMF_SUBSEQUENTMESSAGE_CHALLENGERESP 1 +typedef struct ossl_crmf_encryptedvalue_st OSSL_CRMF_ENCRYPTEDVALUE; + +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDVALUE) +typedef struct ossl_crmf_msg_st OSSL_CRMF_MSG; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSG) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_MSG) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CRMF_MSG, OSSL_CRMF_MSG, OSSL_CRMF_MSG) +#define sk_OSSL_CRMF_MSG_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_value(sk, idx) ((OSSL_CRMF_MSG *)OPENSSL_sk_value(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk), (idx))) +#define sk_OSSL_CRMF_MSG_new(cmp) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new(ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp))) +#define sk_OSSL_CRMF_MSG_new_null() ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CRMF_MSG_new_reserve(cmp, n) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp), (n))) +#define sk_OSSL_CRMF_MSG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (n)) +#define sk_OSSL_CRMF_MSG_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_delete(sk, i) ((OSSL_CRMF_MSG *)OPENSSL_sk_delete(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (i))) +#define sk_OSSL_CRMF_MSG_delete_ptr(sk, ptr) ((OSSL_CRMF_MSG *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr))) +#define sk_OSSL_CRMF_MSG_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_pop(sk) ((OSSL_CRMF_MSG *)OPENSSL_sk_pop(ossl_check_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_shift(sk) ((OSSL_CRMF_MSG *)OPENSSL_sk_shift(ossl_check_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CRMF_MSG_sk_type(sk),ossl_check_OSSL_CRMF_MSG_freefunc_type(freefunc)) +#define sk_OSSL_CRMF_MSG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr), (idx)) +#define sk_OSSL_CRMF_MSG_set(sk, idx, ptr) ((OSSL_CRMF_MSG *)OPENSSL_sk_set(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (idx), ossl_check_OSSL_CRMF_MSG_type(ptr))) +#define sk_OSSL_CRMF_MSG_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr), pnum) +#define sk_OSSL_CRMF_MSG_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_dup(sk) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_copyfunc_type(copyfunc), ossl_check_OSSL_CRMF_MSG_freefunc_type(freefunc))) +#define sk_OSSL_CRMF_MSG_set_cmp_func(sk, cmp) ((sk_OSSL_CRMF_MSG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp))) + +typedef struct ossl_crmf_attributetypeandvalue_st OSSL_CRMF_ATTRIBUTETYPEANDVALUE; +typedef struct ossl_crmf_pbmparameter_st OSSL_CRMF_PBMPARAMETER; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_PBMPARAMETER) +typedef struct ossl_crmf_poposigningkey_st OSSL_CRMF_POPOSIGNINGKEY; +typedef struct ossl_crmf_certrequest_st OSSL_CRMF_CERTREQUEST; +typedef struct ossl_crmf_certid_st OSSL_CRMF_CERTID; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_CERTID) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_CERTID) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CRMF_CERTID, OSSL_CRMF_CERTID, OSSL_CRMF_CERTID) +#define sk_OSSL_CRMF_CERTID_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_value(sk, idx) ((OSSL_CRMF_CERTID *)OPENSSL_sk_value(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk), (idx))) +#define sk_OSSL_CRMF_CERTID_new(cmp) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new(ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp))) +#define sk_OSSL_CRMF_CERTID_new_null() ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CRMF_CERTID_new_reserve(cmp, n) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp), (n))) +#define sk_OSSL_CRMF_CERTID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (n)) +#define sk_OSSL_CRMF_CERTID_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_delete(sk, i) ((OSSL_CRMF_CERTID *)OPENSSL_sk_delete(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (i))) +#define sk_OSSL_CRMF_CERTID_delete_ptr(sk, ptr) ((OSSL_CRMF_CERTID *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr))) +#define sk_OSSL_CRMF_CERTID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_pop(sk) ((OSSL_CRMF_CERTID *)OPENSSL_sk_pop(ossl_check_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_shift(sk) ((OSSL_CRMF_CERTID *)OPENSSL_sk_shift(ossl_check_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CRMF_CERTID_sk_type(sk),ossl_check_OSSL_CRMF_CERTID_freefunc_type(freefunc)) +#define sk_OSSL_CRMF_CERTID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr), (idx)) +#define sk_OSSL_CRMF_CERTID_set(sk, idx, ptr) ((OSSL_CRMF_CERTID *)OPENSSL_sk_set(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (idx), ossl_check_OSSL_CRMF_CERTID_type(ptr))) +#define sk_OSSL_CRMF_CERTID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr), pnum) +#define sk_OSSL_CRMF_CERTID_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_dup(sk) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_copyfunc_type(copyfunc), ossl_check_OSSL_CRMF_CERTID_freefunc_type(freefunc))) +#define sk_OSSL_CRMF_CERTID_set_cmp_func(sk, cmp) ((sk_OSSL_CRMF_CERTID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp))) + + +typedef struct ossl_crmf_pkipublicationinfo_st OSSL_CRMF_PKIPUBLICATIONINFO; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_PKIPUBLICATIONINFO) +typedef struct ossl_crmf_singlepubinfo_st OSSL_CRMF_SINGLEPUBINFO; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_SINGLEPUBINFO) +typedef struct ossl_crmf_certtemplate_st OSSL_CRMF_CERTTEMPLATE; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_CERTTEMPLATE) +typedef STACK_OF(OSSL_CRMF_MSG) OSSL_CRMF_MSGS; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSGS) + +typedef struct ossl_crmf_optionalvalidity_st OSSL_CRMF_OPTIONALVALIDITY; + +/* crmf_pbm.c */ +OSSL_CRMF_PBMPARAMETER *OSSL_CRMF_pbmp_new(OSSL_LIB_CTX *libctx, size_t slen, + int owfnid, size_t itercnt, + int macnid); +int OSSL_CRMF_pbm_new(OSSL_LIB_CTX *libctx, const char *propq, + const OSSL_CRMF_PBMPARAMETER *pbmp, + const unsigned char *msg, size_t msglen, + const unsigned char *sec, size_t seclen, + unsigned char **mac, size_t *maclen); + +/* crmf_lib.c */ +int OSSL_CRMF_MSG_set1_regCtrl_regToken(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *tok); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regCtrl_regToken(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_authenticator(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *auth); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regCtrl_authenticator(const OSSL_CRMF_MSG *msg); +int +OSSL_CRMF_MSG_PKIPublicationInfo_push0_SinglePubInfo(OSSL_CRMF_PKIPUBLICATIONINFO *pi, + OSSL_CRMF_SINGLEPUBINFO *spi); +# define OSSL_CRMF_PUB_METHOD_DONTCARE 0 +# define OSSL_CRMF_PUB_METHOD_X500 1 +# define OSSL_CRMF_PUB_METHOD_WEB 2 +# define OSSL_CRMF_PUB_METHOD_LDAP 3 +int OSSL_CRMF_MSG_set0_SinglePubInfo(OSSL_CRMF_SINGLEPUBINFO *spi, + int method, GENERAL_NAME *nm); +# define OSSL_CRMF_PUB_ACTION_DONTPUBLISH 0 +# define OSSL_CRMF_PUB_ACTION_PLEASEPUBLISH 1 +int OSSL_CRMF_MSG_set_PKIPublicationInfo_action(OSSL_CRMF_PKIPUBLICATIONINFO *pi, + int action); +int OSSL_CRMF_MSG_set1_regCtrl_pkiPublicationInfo(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_PKIPUBLICATIONINFO *pi); +OSSL_CRMF_PKIPUBLICATIONINFO +*OSSL_CRMF_MSG_get0_regCtrl_pkiPublicationInfo(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_protocolEncrKey(OSSL_CRMF_MSG *msg, + const X509_PUBKEY *pubkey); +X509_PUBKEY +*OSSL_CRMF_MSG_get0_regCtrl_protocolEncrKey(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_oldCertID(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_CERTID *cid); +OSSL_CRMF_CERTID +*OSSL_CRMF_MSG_get0_regCtrl_oldCertID(const OSSL_CRMF_MSG *msg); +OSSL_CRMF_CERTID *OSSL_CRMF_CERTID_gen(const X509_NAME *issuer, + const ASN1_INTEGER *serial); + +int OSSL_CRMF_MSG_set1_regInfo_utf8Pairs(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *utf8pairs); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regInfo_utf8Pairs(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regInfo_certReq(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_CERTREQUEST *cr); +OSSL_CRMF_CERTREQUEST +*OSSL_CRMF_MSG_get0_regInfo_certReq(const OSSL_CRMF_MSG *msg); + +int OSSL_CRMF_MSG_set0_validity(OSSL_CRMF_MSG *crm, + ASN1_TIME *notBefore, ASN1_TIME *notAfter); +int OSSL_CRMF_MSG_set_certReqId(OSSL_CRMF_MSG *crm, int rid); +int OSSL_CRMF_MSG_get_certReqId(const OSSL_CRMF_MSG *crm); +int OSSL_CRMF_MSG_set0_extensions(OSSL_CRMF_MSG *crm, X509_EXTENSIONS *exts); + +int OSSL_CRMF_MSG_push0_extension(OSSL_CRMF_MSG *crm, X509_EXTENSION *ext); +# define OSSL_CRMF_POPO_NONE -1 +# define OSSL_CRMF_POPO_RAVERIFIED 0 +# define OSSL_CRMF_POPO_SIGNATURE 1 +# define OSSL_CRMF_POPO_KEYENC 2 +# define OSSL_CRMF_POPO_KEYAGREE 3 +int OSSL_CRMF_MSG_create_popo(int meth, OSSL_CRMF_MSG *crm, + EVP_PKEY *pkey, const EVP_MD *digest, + OSSL_LIB_CTX *libctx, const char *propq); +int OSSL_CRMF_MSGS_verify_popo(const OSSL_CRMF_MSGS *reqs, + int rid, int acceptRAVerified, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_CRMF_CERTTEMPLATE *OSSL_CRMF_MSG_get0_tmpl(const OSSL_CRMF_MSG *crm); +X509_PUBKEY +*OSSL_CRMF_CERTTEMPLATE_get0_publicKey(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTTEMPLATE_get0_subject(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTTEMPLATE_get0_issuer(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const ASN1_INTEGER +*OSSL_CRMF_CERTTEMPLATE_get0_serialNumber(const OSSL_CRMF_CERTTEMPLATE *tmpl); +X509_EXTENSIONS +*OSSL_CRMF_CERTTEMPLATE_get0_extensions(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTID_get0_issuer(const OSSL_CRMF_CERTID *cid); +const ASN1_INTEGER +*OSSL_CRMF_CERTID_get0_serialNumber(const OSSL_CRMF_CERTID *cid); +int OSSL_CRMF_CERTTEMPLATE_fill(OSSL_CRMF_CERTTEMPLATE *tmpl, + EVP_PKEY *pubkey, + const X509_NAME *subject, + const X509_NAME *issuer, + const ASN1_INTEGER *serial); +X509 +*OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert, + OSSL_LIB_CTX *libctx, const char *propq, + EVP_PKEY *pkey); + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_CRMF) */ +#endif /* !defined(OPENSSL_CRMF_H) */ diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/crypto.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/crypto.h new file mode 100644 index 00000000000..55e00dccdc1 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/crypto.h @@ -0,0 +1,561 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/crypto.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CRYPTO_H +# define OPENSSL_CRYPTO_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CRYPTO_H +# endif + +# include +# include + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# endif + +# include +# include +# include +# include +# include +# include + +# ifdef CHARSET_EBCDIC +# include +# endif + +/* + * Resolve problems on some operating systems with symbol names that clash + * one way or another + */ +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSLeay OpenSSL_version_num +# define SSLeay_version OpenSSL_version +# define SSLEAY_VERSION_NUMBER OPENSSL_VERSION_NUMBER +# define SSLEAY_VERSION OPENSSL_VERSION +# define SSLEAY_CFLAGS OPENSSL_CFLAGS +# define SSLEAY_BUILT_ON OPENSSL_BUILT_ON +# define SSLEAY_PLATFORM OPENSSL_PLATFORM +# define SSLEAY_DIR OPENSSL_DIR + +/* + * Old type for allocating dynamic locks. No longer used. Use the new thread + * API instead. + */ +typedef struct { + int dummy; +} CRYPTO_dynlock; + +# endif /* OPENSSL_NO_DEPRECATED_1_1_0 */ + +typedef void CRYPTO_RWLOCK; + +CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void); +__owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock); +__owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock); +int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock); +void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock); + +int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock); + +/* No longer needed, so this is a no-op */ +#define OPENSSL_malloc_init() while(0) continue + +# define OPENSSL_malloc(num) \ + CRYPTO_malloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_zalloc(num) \ + CRYPTO_zalloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_realloc(addr, num) \ + CRYPTO_realloc(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_clear_realloc(addr, old_num, num) \ + CRYPTO_clear_realloc(addr, old_num, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_clear_free(addr, num) \ + CRYPTO_clear_free(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_free(addr) \ + CRYPTO_free(addr, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_memdup(str, s) \ + CRYPTO_memdup((str), s, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_strdup(str) \ + CRYPTO_strdup(str, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_strndup(str, n) \ + CRYPTO_strndup(str, n, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_malloc(num) \ + CRYPTO_secure_malloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_zalloc(num) \ + CRYPTO_secure_zalloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_free(addr) \ + CRYPTO_secure_free(addr, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_clear_free(addr, num) \ + CRYPTO_secure_clear_free(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_actual_size(ptr) \ + CRYPTO_secure_actual_size(ptr) + +size_t OPENSSL_strlcpy(char *dst, const char *src, size_t siz); +size_t OPENSSL_strlcat(char *dst, const char *src, size_t siz); +size_t OPENSSL_strnlen(const char *str, size_t maxlen); +int OPENSSL_buf2hexstr_ex(char *str, size_t str_n, size_t *strlength, + const unsigned char *buf, size_t buflen, + const char sep); +char *OPENSSL_buf2hexstr(const unsigned char *buf, long buflen); +int OPENSSL_hexstr2buf_ex(unsigned char *buf, size_t buf_n, size_t *buflen, + const char *str, const char sep); +unsigned char *OPENSSL_hexstr2buf(const char *str, long *buflen); +int OPENSSL_hexchar2int(unsigned char c); +int OPENSSL_strcasecmp(const char *s1, const char *s2); +int OPENSSL_strncasecmp(const char *s1, const char *s2, size_t n); + +# define OPENSSL_MALLOC_MAX_NELEMS(type) (((1U<<(sizeof(int)*8-1))-1)/sizeof(type)) + +/* + * These functions return the values of OPENSSL_VERSION_MAJOR, + * OPENSSL_VERSION_MINOR, OPENSSL_VERSION_PATCH, OPENSSL_VERSION_PRE_RELEASE + * and OPENSSL_VERSION_BUILD_METADATA, respectively. + */ +unsigned int OPENSSL_version_major(void); +unsigned int OPENSSL_version_minor(void); +unsigned int OPENSSL_version_patch(void); +const char *OPENSSL_version_pre_release(void); +const char *OPENSSL_version_build_metadata(void); + +unsigned long OpenSSL_version_num(void); +const char *OpenSSL_version(int type); +# define OPENSSL_VERSION 0 +# define OPENSSL_CFLAGS 1 +# define OPENSSL_BUILT_ON 2 +# define OPENSSL_PLATFORM 3 +# define OPENSSL_DIR 4 +# define OPENSSL_ENGINES_DIR 5 +# define OPENSSL_VERSION_STRING 6 +# define OPENSSL_FULL_VERSION_STRING 7 +# define OPENSSL_MODULES_DIR 8 +# define OPENSSL_CPU_INFO 9 + +const char *OPENSSL_info(int type); +/* + * The series starts at 1001 to avoid confusion with the OpenSSL_version + * types. + */ +# define OPENSSL_INFO_CONFIG_DIR 1001 +# define OPENSSL_INFO_ENGINES_DIR 1002 +# define OPENSSL_INFO_MODULES_DIR 1003 +# define OPENSSL_INFO_DSO_EXTENSION 1004 +# define OPENSSL_INFO_DIR_FILENAME_SEPARATOR 1005 +# define OPENSSL_INFO_LIST_SEPARATOR 1006 +# define OPENSSL_INFO_SEED_SOURCE 1007 +# define OPENSSL_INFO_CPU_SETTINGS 1008 + +int OPENSSL_issetugid(void); + +struct crypto_ex_data_st { + OSSL_LIB_CTX *ctx; + STACK_OF(void) *sk; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(void, void, void) +#define sk_void_num(sk) OPENSSL_sk_num(ossl_check_const_void_sk_type(sk)) +#define sk_void_value(sk, idx) ((void *)OPENSSL_sk_value(ossl_check_const_void_sk_type(sk), (idx))) +#define sk_void_new(cmp) ((STACK_OF(void) *)OPENSSL_sk_new(ossl_check_void_compfunc_type(cmp))) +#define sk_void_new_null() ((STACK_OF(void) *)OPENSSL_sk_new_null()) +#define sk_void_new_reserve(cmp, n) ((STACK_OF(void) *)OPENSSL_sk_new_reserve(ossl_check_void_compfunc_type(cmp), (n))) +#define sk_void_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_void_sk_type(sk), (n)) +#define sk_void_free(sk) OPENSSL_sk_free(ossl_check_void_sk_type(sk)) +#define sk_void_zero(sk) OPENSSL_sk_zero(ossl_check_void_sk_type(sk)) +#define sk_void_delete(sk, i) ((void *)OPENSSL_sk_delete(ossl_check_void_sk_type(sk), (i))) +#define sk_void_delete_ptr(sk, ptr) ((void *)OPENSSL_sk_delete_ptr(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr))) +#define sk_void_push(sk, ptr) OPENSSL_sk_push(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_pop(sk) ((void *)OPENSSL_sk_pop(ossl_check_void_sk_type(sk))) +#define sk_void_shift(sk) ((void *)OPENSSL_sk_shift(ossl_check_void_sk_type(sk))) +#define sk_void_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_void_sk_type(sk),ossl_check_void_freefunc_type(freefunc)) +#define sk_void_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr), (idx)) +#define sk_void_set(sk, idx, ptr) ((void *)OPENSSL_sk_set(ossl_check_void_sk_type(sk), (idx), ossl_check_void_type(ptr))) +#define sk_void_find(sk, ptr) OPENSSL_sk_find(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr), pnum) +#define sk_void_sort(sk) OPENSSL_sk_sort(ossl_check_void_sk_type(sk)) +#define sk_void_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_void_sk_type(sk)) +#define sk_void_dup(sk) ((STACK_OF(void) *)OPENSSL_sk_dup(ossl_check_const_void_sk_type(sk))) +#define sk_void_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(void) *)OPENSSL_sk_deep_copy(ossl_check_const_void_sk_type(sk), ossl_check_void_copyfunc_type(copyfunc), ossl_check_void_freefunc_type(freefunc))) +#define sk_void_set_cmp_func(sk, cmp) ((sk_void_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_void_sk_type(sk), ossl_check_void_compfunc_type(cmp))) + + + +/* + * Per class, we have a STACK of function pointers. + */ +# define CRYPTO_EX_INDEX_SSL 0 +# define CRYPTO_EX_INDEX_SSL_CTX 1 +# define CRYPTO_EX_INDEX_SSL_SESSION 2 +# define CRYPTO_EX_INDEX_X509 3 +# define CRYPTO_EX_INDEX_X509_STORE 4 +# define CRYPTO_EX_INDEX_X509_STORE_CTX 5 +# define CRYPTO_EX_INDEX_DH 6 +# define CRYPTO_EX_INDEX_DSA 7 +# define CRYPTO_EX_INDEX_EC_KEY 8 +# define CRYPTO_EX_INDEX_RSA 9 +# define CRYPTO_EX_INDEX_ENGINE 10 +# define CRYPTO_EX_INDEX_UI 11 +# define CRYPTO_EX_INDEX_BIO 12 +# define CRYPTO_EX_INDEX_APP 13 +# define CRYPTO_EX_INDEX_UI_METHOD 14 +# define CRYPTO_EX_INDEX_RAND_DRBG 15 +# define CRYPTO_EX_INDEX_DRBG CRYPTO_EX_INDEX_RAND_DRBG +# define CRYPTO_EX_INDEX_OSSL_LIB_CTX 16 +# define CRYPTO_EX_INDEX_EVP_PKEY 17 +# define CRYPTO_EX_INDEX__COUNT 18 + +typedef void CRYPTO_EX_new (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef void CRYPTO_EX_free (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef int CRYPTO_EX_dup (CRYPTO_EX_DATA *to, const CRYPTO_EX_DATA *from, + void **from_d, int idx, long argl, void *argp); +__owur int CRYPTO_get_ex_new_index(int class_index, long argl, void *argp, + CRYPTO_EX_new *new_func, + CRYPTO_EX_dup *dup_func, + CRYPTO_EX_free *free_func); +/* No longer use an index. */ +int CRYPTO_free_ex_index(int class_index, int idx); + +/* + * Initialise/duplicate/free CRYPTO_EX_DATA variables corresponding to a + * given class (invokes whatever per-class callbacks are applicable) + */ +int CRYPTO_new_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad); +int CRYPTO_dup_ex_data(int class_index, CRYPTO_EX_DATA *to, + const CRYPTO_EX_DATA *from); + +void CRYPTO_free_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad); + +/* Allocate a single item in the CRYPTO_EX_DATA variable */ +int CRYPTO_alloc_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad, + int idx); + +/* + * Get/set data in a CRYPTO_EX_DATA variable corresponding to a particular + * index (relative to the class type involved) + */ +int CRYPTO_set_ex_data(CRYPTO_EX_DATA *ad, int idx, void *val); +void *CRYPTO_get_ex_data(const CRYPTO_EX_DATA *ad, int idx); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* + * This function cleans up all "ex_data" state. It mustn't be called under + * potential race-conditions. + */ +# define CRYPTO_cleanup_all_ex_data() while(0) continue + +/* + * The old locking functions have been removed completely without compatibility + * macros. This is because the old functions either could not properly report + * errors, or the returned error values were not clearly documented. + * Replacing the locking functions with no-ops would cause race condition + * issues in the affected applications. It is far better for them to fail at + * compile time. + * On the other hand, the locking callbacks are no longer used. Consequently, + * the callback management functions can be safely replaced with no-op macros. + */ +# define CRYPTO_num_locks() (1) +# define CRYPTO_set_locking_callback(func) +# define CRYPTO_get_locking_callback() (NULL) +# define CRYPTO_set_add_lock_callback(func) +# define CRYPTO_get_add_lock_callback() (NULL) + +/* + * These defines where used in combination with the old locking callbacks, + * they are not called anymore, but old code that's not called might still + * use them. + */ +# define CRYPTO_LOCK 1 +# define CRYPTO_UNLOCK 2 +# define CRYPTO_READ 4 +# define CRYPTO_WRITE 8 + +/* This structure is no longer used */ +typedef struct crypto_threadid_st { + int dummy; +} CRYPTO_THREADID; +/* Only use CRYPTO_THREADID_set_[numeric|pointer]() within callbacks */ +# define CRYPTO_THREADID_set_numeric(id, val) +# define CRYPTO_THREADID_set_pointer(id, ptr) +# define CRYPTO_THREADID_set_callback(threadid_func) (0) +# define CRYPTO_THREADID_get_callback() (NULL) +# define CRYPTO_THREADID_current(id) +# define CRYPTO_THREADID_cmp(a, b) (-1) +# define CRYPTO_THREADID_cpy(dest, src) +# define CRYPTO_THREADID_hash(id) (0UL) + +# ifndef OPENSSL_NO_DEPRECATED_1_0_0 +# define CRYPTO_set_id_callback(func) +# define CRYPTO_get_id_callback() (NULL) +# define CRYPTO_thread_id() (0UL) +# endif /* OPENSSL_NO_DEPRECATED_1_0_0 */ + +# define CRYPTO_set_dynlock_create_callback(dyn_create_function) +# define CRYPTO_set_dynlock_lock_callback(dyn_lock_function) +# define CRYPTO_set_dynlock_destroy_callback(dyn_destroy_function) +# define CRYPTO_get_dynlock_create_callback() (NULL) +# define CRYPTO_get_dynlock_lock_callback() (NULL) +# define CRYPTO_get_dynlock_destroy_callback() (NULL) +# endif /* OPENSSL_NO_DEPRECATED_1_1_0 */ + +typedef void *(*CRYPTO_malloc_fn)(size_t num, const char *file, int line); +typedef void *(*CRYPTO_realloc_fn)(void *addr, size_t num, const char *file, + int line); +typedef void (*CRYPTO_free_fn)(void *addr, const char *file, int line); +int CRYPTO_set_mem_functions(CRYPTO_malloc_fn malloc_fn, + CRYPTO_realloc_fn realloc_fn, + CRYPTO_free_fn free_fn); +void CRYPTO_get_mem_functions(CRYPTO_malloc_fn *malloc_fn, + CRYPTO_realloc_fn *realloc_fn, + CRYPTO_free_fn *free_fn); + +OSSL_CRYPTO_ALLOC void *CRYPTO_malloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_zalloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_memdup(const void *str, size_t siz, const char *file, int line); +OSSL_CRYPTO_ALLOC char *CRYPTO_strdup(const char *str, const char *file, int line); +OSSL_CRYPTO_ALLOC char *CRYPTO_strndup(const char *str, size_t s, const char *file, int line); +void CRYPTO_free(void *ptr, const char *file, int line); +void CRYPTO_clear_free(void *ptr, size_t num, const char *file, int line); +void *CRYPTO_realloc(void *addr, size_t num, const char *file, int line); +void *CRYPTO_clear_realloc(void *addr, size_t old_num, size_t num, + const char *file, int line); + +int CRYPTO_secure_malloc_init(size_t sz, size_t minsize); +int CRYPTO_secure_malloc_done(void); +OSSL_CRYPTO_ALLOC void *CRYPTO_secure_malloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_secure_zalloc(size_t num, const char *file, int line); +void CRYPTO_secure_free(void *ptr, const char *file, int line); +void CRYPTO_secure_clear_free(void *ptr, size_t num, + const char *file, int line); +int CRYPTO_secure_allocated(const void *ptr); +int CRYPTO_secure_malloc_initialized(void); +size_t CRYPTO_secure_actual_size(void *ptr); +size_t CRYPTO_secure_used(void); + +void OPENSSL_cleanse(void *ptr, size_t len); + +# ifndef OPENSSL_NO_CRYPTO_MDEBUG +/* + * The following can be used to detect memory leaks in the library. If + * used, it turns on malloc checking + */ +# define CRYPTO_MEM_CHECK_OFF 0x0 /* Control only */ +# define CRYPTO_MEM_CHECK_ON 0x1 /* Control and mode bit */ +# define CRYPTO_MEM_CHECK_ENABLE 0x2 /* Control and mode bit */ +# define CRYPTO_MEM_CHECK_DISABLE 0x3 /* Control only */ + +void CRYPTO_get_alloc_counts(int *mcount, int *rcount, int *fcount); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define OPENSSL_mem_debug_push(info) \ + CRYPTO_mem_debug_push(info, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_mem_debug_pop() \ + CRYPTO_mem_debug_pop() +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_set_mem_debug(int flag); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_ctrl(int mode); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_debug_push(const char *info, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_debug_pop(void); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_malloc(void *addr, size_t num, + int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_realloc(void *addr1, void *addr2, + size_t num, int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_free(void *addr, int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 +int CRYPTO_mem_leaks_cb(int (*cb)(const char *str, size_t len, void *u), + void *u); +# endif +# ifndef OPENSSL_NO_STDIO +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_leaks_fp(FILE *); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_leaks(BIO *bio); +# endif +# endif /* OPENSSL_NO_CRYPTO_MDEBUG */ + +/* die if we have to */ +ossl_noreturn void OPENSSL_die(const char *assertion, const char *file, int line); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OpenSSLDie(f,l,a) OPENSSL_die((a),(f),(l)) +# endif +# define OPENSSL_assert(e) \ + (void)((e) ? 0 : (OPENSSL_die("assertion failed: " #e, OPENSSL_FILE, OPENSSL_LINE), 1)) + +int OPENSSL_isservice(void); + +void OPENSSL_init(void); +# ifdef OPENSSL_SYS_UNIX +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_prepare(void); +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_parent(void); +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_child(void); +# endif +# endif + +struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result); +int OPENSSL_gmtime_adj(struct tm *tm, int offset_day, long offset_sec); +int OPENSSL_gmtime_diff(int *pday, int *psec, + const struct tm *from, const struct tm *to); + +/* + * CRYPTO_memcmp returns zero iff the |len| bytes at |a| and |b| are equal. + * It takes an amount of time dependent on |len|, but independent of the + * contents of |a| and |b|. Unlike memcmp, it cannot be used to put elements + * into a defined order as the return value when a != b is undefined, other + * than to be non-zero. + */ +int CRYPTO_memcmp(const void * in_a, const void * in_b, size_t len); + +/* Standard initialisation options */ +# define OPENSSL_INIT_NO_LOAD_CRYPTO_STRINGS 0x00000001L +# define OPENSSL_INIT_LOAD_CRYPTO_STRINGS 0x00000002L +# define OPENSSL_INIT_ADD_ALL_CIPHERS 0x00000004L +# define OPENSSL_INIT_ADD_ALL_DIGESTS 0x00000008L +# define OPENSSL_INIT_NO_ADD_ALL_CIPHERS 0x00000010L +# define OPENSSL_INIT_NO_ADD_ALL_DIGESTS 0x00000020L +# define OPENSSL_INIT_LOAD_CONFIG 0x00000040L +# define OPENSSL_INIT_NO_LOAD_CONFIG 0x00000080L +# define OPENSSL_INIT_ASYNC 0x00000100L +# define OPENSSL_INIT_ENGINE_RDRAND 0x00000200L +# define OPENSSL_INIT_ENGINE_DYNAMIC 0x00000400L +# define OPENSSL_INIT_ENGINE_OPENSSL 0x00000800L +# define OPENSSL_INIT_ENGINE_CRYPTODEV 0x00001000L +# define OPENSSL_INIT_ENGINE_CAPI 0x00002000L +# define OPENSSL_INIT_ENGINE_PADLOCK 0x00004000L +# define OPENSSL_INIT_ENGINE_AFALG 0x00008000L +/* FREE: 0x00010000L */ +# define OPENSSL_INIT_ATFORK 0x00020000L +/* OPENSSL_INIT_BASE_ONLY 0x00040000L */ +# define OPENSSL_INIT_NO_ATEXIT 0x00080000L +/* OPENSSL_INIT flag range 0x03f00000 reserved for OPENSSL_init_ssl() */ +/* FREE: 0x04000000L */ +/* FREE: 0x08000000L */ +/* FREE: 0x10000000L */ +/* FREE: 0x20000000L */ +/* FREE: 0x40000000L */ +/* FREE: 0x80000000L */ +/* Max OPENSSL_INIT flag value is 0x80000000 */ + +/* openssl and dasync not counted as builtin */ +# define OPENSSL_INIT_ENGINE_ALL_BUILTIN \ + (OPENSSL_INIT_ENGINE_RDRAND | OPENSSL_INIT_ENGINE_DYNAMIC \ + | OPENSSL_INIT_ENGINE_CRYPTODEV | OPENSSL_INIT_ENGINE_CAPI | \ + OPENSSL_INIT_ENGINE_PADLOCK) + +/* Library initialisation functions */ +void OPENSSL_cleanup(void); +int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings); +int OPENSSL_atexit(void (*handler)(void)); +void OPENSSL_thread_stop(void); +void OPENSSL_thread_stop_ex(OSSL_LIB_CTX *ctx); + +/* Low-level control of initialization */ +OPENSSL_INIT_SETTINGS *OPENSSL_INIT_new(void); +# ifndef OPENSSL_NO_STDIO +int OPENSSL_INIT_set_config_filename(OPENSSL_INIT_SETTINGS *settings, + const char *config_filename); +void OPENSSL_INIT_set_config_file_flags(OPENSSL_INIT_SETTINGS *settings, + unsigned long flags); +int OPENSSL_INIT_set_config_appname(OPENSSL_INIT_SETTINGS *settings, + const char *config_appname); +# endif +void OPENSSL_INIT_free(OPENSSL_INIT_SETTINGS *settings); + +# if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) +# if defined(_WIN32) +# if defined(BASETYPES) || defined(_WINDEF_H) +/* application has to include in order to use this */ +typedef DWORD CRYPTO_THREAD_LOCAL; +typedef DWORD CRYPTO_THREAD_ID; + +typedef LONG CRYPTO_ONCE; +# define CRYPTO_ONCE_STATIC_INIT 0 +# endif +# else +# if defined(__TANDEM) && defined(_SPT_MODEL_) +# define SPT_THREAD_SIGNAL 1 +# define SPT_THREAD_AWARE 1 +# include +# else +# include +# endif +typedef pthread_once_t CRYPTO_ONCE; +typedef pthread_key_t CRYPTO_THREAD_LOCAL; +typedef pthread_t CRYPTO_THREAD_ID; + +# define CRYPTO_ONCE_STATIC_INIT PTHREAD_ONCE_INIT +# endif +# endif + +# if !defined(CRYPTO_ONCE_STATIC_INIT) +typedef unsigned int CRYPTO_ONCE; +typedef unsigned int CRYPTO_THREAD_LOCAL; +typedef unsigned int CRYPTO_THREAD_ID; +# define CRYPTO_ONCE_STATIC_INIT 0 +# endif + +int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void)); + +int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *)); +void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key); +int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val); +int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key); + +CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void); +int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b); + +OSSL_LIB_CTX *OSSL_LIB_CTX_new(void); +OSSL_LIB_CTX *OSSL_LIB_CTX_new_from_dispatch(const OSSL_CORE_HANDLE *handle, + const OSSL_DISPATCH *in); +OSSL_LIB_CTX *OSSL_LIB_CTX_new_child(const OSSL_CORE_HANDLE *handle, + const OSSL_DISPATCH *in); +int OSSL_LIB_CTX_load_config(OSSL_LIB_CTX *ctx, const char *config_file); +void OSSL_LIB_CTX_free(OSSL_LIB_CTX *); +OSSL_LIB_CTX *OSSL_LIB_CTX_get0_global_default(void); +OSSL_LIB_CTX *OSSL_LIB_CTX_set0_default(OSSL_LIB_CTX *libctx); + +void OSSL_sleep(uint64_t millis); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ct.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ct.h new file mode 100644 index 00000000000..e6dd1192a4e --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ct.h @@ -0,0 +1,573 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ct.h.in + * + * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CT_H +# define OPENSSL_CT_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CT_H +# endif + +# include + +# ifndef OPENSSL_NO_CT +# include +# include +# include +# include +# ifdef __cplusplus +extern "C" { +# endif + + +/* Minimum RSA key size, from RFC6962 */ +# define SCT_MIN_RSA_BITS 2048 + +/* All hashes are SHA256 in v1 of Certificate Transparency */ +# define CT_V1_HASHLEN SHA256_DIGEST_LENGTH + +SKM_DEFINE_STACK_OF_INTERNAL(SCT, SCT, SCT) +#define sk_SCT_num(sk) OPENSSL_sk_num(ossl_check_const_SCT_sk_type(sk)) +#define sk_SCT_value(sk, idx) ((SCT *)OPENSSL_sk_value(ossl_check_const_SCT_sk_type(sk), (idx))) +#define sk_SCT_new(cmp) ((STACK_OF(SCT) *)OPENSSL_sk_new(ossl_check_SCT_compfunc_type(cmp))) +#define sk_SCT_new_null() ((STACK_OF(SCT) *)OPENSSL_sk_new_null()) +#define sk_SCT_new_reserve(cmp, n) ((STACK_OF(SCT) *)OPENSSL_sk_new_reserve(ossl_check_SCT_compfunc_type(cmp), (n))) +#define sk_SCT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SCT_sk_type(sk), (n)) +#define sk_SCT_free(sk) OPENSSL_sk_free(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_zero(sk) OPENSSL_sk_zero(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_delete(sk, i) ((SCT *)OPENSSL_sk_delete(ossl_check_SCT_sk_type(sk), (i))) +#define sk_SCT_delete_ptr(sk, ptr) ((SCT *)OPENSSL_sk_delete_ptr(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr))) +#define sk_SCT_push(sk, ptr) OPENSSL_sk_push(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_pop(sk) ((SCT *)OPENSSL_sk_pop(ossl_check_SCT_sk_type(sk))) +#define sk_SCT_shift(sk) ((SCT *)OPENSSL_sk_shift(ossl_check_SCT_sk_type(sk))) +#define sk_SCT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SCT_sk_type(sk),ossl_check_SCT_freefunc_type(freefunc)) +#define sk_SCT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr), (idx)) +#define sk_SCT_set(sk, idx, ptr) ((SCT *)OPENSSL_sk_set(ossl_check_SCT_sk_type(sk), (idx), ossl_check_SCT_type(ptr))) +#define sk_SCT_find(sk, ptr) OPENSSL_sk_find(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr), pnum) +#define sk_SCT_sort(sk) OPENSSL_sk_sort(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SCT_sk_type(sk)) +#define sk_SCT_dup(sk) ((STACK_OF(SCT) *)OPENSSL_sk_dup(ossl_check_const_SCT_sk_type(sk))) +#define sk_SCT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SCT) *)OPENSSL_sk_deep_copy(ossl_check_const_SCT_sk_type(sk), ossl_check_SCT_copyfunc_type(copyfunc), ossl_check_SCT_freefunc_type(freefunc))) +#define sk_SCT_set_cmp_func(sk, cmp) ((sk_SCT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SCT_sk_type(sk), ossl_check_SCT_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CTLOG, CTLOG, CTLOG) +#define sk_CTLOG_num(sk) OPENSSL_sk_num(ossl_check_const_CTLOG_sk_type(sk)) +#define sk_CTLOG_value(sk, idx) ((CTLOG *)OPENSSL_sk_value(ossl_check_const_CTLOG_sk_type(sk), (idx))) +#define sk_CTLOG_new(cmp) ((STACK_OF(CTLOG) *)OPENSSL_sk_new(ossl_check_CTLOG_compfunc_type(cmp))) +#define sk_CTLOG_new_null() ((STACK_OF(CTLOG) *)OPENSSL_sk_new_null()) +#define sk_CTLOG_new_reserve(cmp, n) ((STACK_OF(CTLOG) *)OPENSSL_sk_new_reserve(ossl_check_CTLOG_compfunc_type(cmp), (n))) +#define sk_CTLOG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CTLOG_sk_type(sk), (n)) +#define sk_CTLOG_free(sk) OPENSSL_sk_free(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_zero(sk) OPENSSL_sk_zero(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_delete(sk, i) ((CTLOG *)OPENSSL_sk_delete(ossl_check_CTLOG_sk_type(sk), (i))) +#define sk_CTLOG_delete_ptr(sk, ptr) ((CTLOG *)OPENSSL_sk_delete_ptr(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr))) +#define sk_CTLOG_push(sk, ptr) OPENSSL_sk_push(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_pop(sk) ((CTLOG *)OPENSSL_sk_pop(ossl_check_CTLOG_sk_type(sk))) +#define sk_CTLOG_shift(sk) ((CTLOG *)OPENSSL_sk_shift(ossl_check_CTLOG_sk_type(sk))) +#define sk_CTLOG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CTLOG_sk_type(sk),ossl_check_CTLOG_freefunc_type(freefunc)) +#define sk_CTLOG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr), (idx)) +#define sk_CTLOG_set(sk, idx, ptr) ((CTLOG *)OPENSSL_sk_set(ossl_check_CTLOG_sk_type(sk), (idx), ossl_check_CTLOG_type(ptr))) +#define sk_CTLOG_find(sk, ptr) OPENSSL_sk_find(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr), pnum) +#define sk_CTLOG_sort(sk) OPENSSL_sk_sort(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CTLOG_sk_type(sk)) +#define sk_CTLOG_dup(sk) ((STACK_OF(CTLOG) *)OPENSSL_sk_dup(ossl_check_const_CTLOG_sk_type(sk))) +#define sk_CTLOG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CTLOG) *)OPENSSL_sk_deep_copy(ossl_check_const_CTLOG_sk_type(sk), ossl_check_CTLOG_copyfunc_type(copyfunc), ossl_check_CTLOG_freefunc_type(freefunc))) +#define sk_CTLOG_set_cmp_func(sk, cmp) ((sk_CTLOG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_compfunc_type(cmp))) + + + +typedef enum { + CT_LOG_ENTRY_TYPE_NOT_SET = -1, + CT_LOG_ENTRY_TYPE_X509 = 0, + CT_LOG_ENTRY_TYPE_PRECERT = 1 +} ct_log_entry_type_t; + +typedef enum { + SCT_VERSION_NOT_SET = -1, + SCT_VERSION_V1 = 0 +} sct_version_t; + +typedef enum { + SCT_SOURCE_UNKNOWN, + SCT_SOURCE_TLS_EXTENSION, + SCT_SOURCE_X509V3_EXTENSION, + SCT_SOURCE_OCSP_STAPLED_RESPONSE +} sct_source_t; + +typedef enum { + SCT_VALIDATION_STATUS_NOT_SET, + SCT_VALIDATION_STATUS_UNKNOWN_LOG, + SCT_VALIDATION_STATUS_VALID, + SCT_VALIDATION_STATUS_INVALID, + SCT_VALIDATION_STATUS_UNVERIFIED, + SCT_VALIDATION_STATUS_UNKNOWN_VERSION +} sct_validation_status_t; + +/****************************************** + * CT policy evaluation context functions * + ******************************************/ + +/* + * Creates a new, empty policy evaluation context associated with the given + * library context and property query string. + * The caller is responsible for calling CT_POLICY_EVAL_CTX_free when finished + * with the CT_POLICY_EVAL_CTX. + */ +CT_POLICY_EVAL_CTX *CT_POLICY_EVAL_CTX_new_ex(OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CT_POLICY_EVAL_CTX_new_ex() but the default library + * context and property query string is used. + */ +CT_POLICY_EVAL_CTX *CT_POLICY_EVAL_CTX_new(void); + +/* Deletes a policy evaluation context and anything it owns. */ +void CT_POLICY_EVAL_CTX_free(CT_POLICY_EVAL_CTX *ctx); + +/* Gets the peer certificate that the SCTs are for */ +X509* CT_POLICY_EVAL_CTX_get0_cert(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the certificate associated with the received SCTs. + * Increments the reference count of cert. + * Returns 1 on success, 0 otherwise. + */ +int CT_POLICY_EVAL_CTX_set1_cert(CT_POLICY_EVAL_CTX *ctx, X509 *cert); + +/* Gets the issuer of the aforementioned certificate */ +X509* CT_POLICY_EVAL_CTX_get0_issuer(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the issuer of the certificate associated with the received SCTs. + * Increments the reference count of issuer. + * Returns 1 on success, 0 otherwise. + */ +int CT_POLICY_EVAL_CTX_set1_issuer(CT_POLICY_EVAL_CTX *ctx, X509 *issuer); + +/* Gets the CT logs that are trusted sources of SCTs */ +const CTLOG_STORE *CT_POLICY_EVAL_CTX_get0_log_store(const CT_POLICY_EVAL_CTX *ctx); + +/* Sets the log store that is in use. It must outlive the CT_POLICY_EVAL_CTX. */ +void CT_POLICY_EVAL_CTX_set_shared_CTLOG_STORE(CT_POLICY_EVAL_CTX *ctx, + CTLOG_STORE *log_store); + +/* + * Gets the time, in milliseconds since the Unix epoch, that will be used as the + * current time when checking whether an SCT was issued in the future. + * Such SCTs will fail validation, as required by RFC6962. + */ +uint64_t CT_POLICY_EVAL_CTX_get_time(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the time to evaluate SCTs against, in milliseconds since the Unix epoch. + * If an SCT's timestamp is after this time, it will be interpreted as having + * been issued in the future. RFC6962 states that "TLS clients MUST reject SCTs + * whose timestamp is in the future", so an SCT will not validate in this case. + */ +void CT_POLICY_EVAL_CTX_set_time(CT_POLICY_EVAL_CTX *ctx, uint64_t time_in_ms); + +/***************** + * SCT functions * + *****************/ + +/* + * Creates a new, blank SCT. + * The caller is responsible for calling SCT_free when finished with the SCT. + */ +SCT *SCT_new(void); + +/* + * Creates a new SCT from some base64-encoded strings. + * The caller is responsible for calling SCT_free when finished with the SCT. + */ +SCT *SCT_new_from_base64(unsigned char version, + const char *logid_base64, + ct_log_entry_type_t entry_type, + uint64_t timestamp, + const char *extensions_base64, + const char *signature_base64); + +/* + * Frees the SCT and the underlying data structures. + */ +void SCT_free(SCT *sct); + +/* + * Free a stack of SCTs, and the underlying SCTs themselves. + * Intended to be compatible with X509V3_EXT_FREE. + */ +void SCT_LIST_free(STACK_OF(SCT) *a); + +/* + * Returns the version of the SCT. + */ +sct_version_t SCT_get_version(const SCT *sct); + +/* + * Set the version of an SCT. + * Returns 1 on success, 0 if the version is unrecognized. + */ +__owur int SCT_set_version(SCT *sct, sct_version_t version); + +/* + * Returns the log entry type of the SCT. + */ +ct_log_entry_type_t SCT_get_log_entry_type(const SCT *sct); + +/* + * Set the log entry type of an SCT. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_log_entry_type(SCT *sct, ct_log_entry_type_t entry_type); + +/* + * Gets the ID of the log that an SCT came from. + * Ownership of the log ID remains with the SCT. + * Returns the length of the log ID. + */ +size_t SCT_get0_log_id(const SCT *sct, unsigned char **log_id); + +/* + * Set the log ID of an SCT to point directly to the *log_id specified. + * The SCT takes ownership of the specified pointer. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set0_log_id(SCT *sct, unsigned char *log_id, size_t log_id_len); + +/* + * Set the log ID of an SCT. + * This makes a copy of the log_id. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_log_id(SCT *sct, const unsigned char *log_id, + size_t log_id_len); + +/* + * Returns the timestamp for the SCT (epoch time in milliseconds). + */ +uint64_t SCT_get_timestamp(const SCT *sct); + +/* + * Set the timestamp of an SCT (epoch time in milliseconds). + */ +void SCT_set_timestamp(SCT *sct, uint64_t timestamp); + +/* + * Return the NID for the signature used by the SCT. + * For CT v1, this will be either NID_sha256WithRSAEncryption or + * NID_ecdsa_with_SHA256 (or NID_undef if incorrect/unset). + */ +int SCT_get_signature_nid(const SCT *sct); + +/* + * Set the signature type of an SCT + * For CT v1, this should be either NID_sha256WithRSAEncryption or + * NID_ecdsa_with_SHA256. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_signature_nid(SCT *sct, int nid); + +/* + * Set *ext to point to the extension data for the SCT. ext must not be NULL. + * The SCT retains ownership of this pointer. + * Returns length of the data pointed to. + */ +size_t SCT_get0_extensions(const SCT *sct, unsigned char **ext); + +/* + * Set the extensions of an SCT to point directly to the *ext specified. + * The SCT takes ownership of the specified pointer. + */ +void SCT_set0_extensions(SCT *sct, unsigned char *ext, size_t ext_len); + +/* + * Set the extensions of an SCT. + * This takes a copy of the ext. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_extensions(SCT *sct, const unsigned char *ext, + size_t ext_len); + +/* + * Set *sig to point to the signature for the SCT. sig must not be NULL. + * The SCT retains ownership of this pointer. + * Returns length of the data pointed to. + */ +size_t SCT_get0_signature(const SCT *sct, unsigned char **sig); + +/* + * Set the signature of an SCT to point directly to the *sig specified. + * The SCT takes ownership of the specified pointer. + */ +void SCT_set0_signature(SCT *sct, unsigned char *sig, size_t sig_len); + +/* + * Set the signature of an SCT to be a copy of the *sig specified. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_signature(SCT *sct, const unsigned char *sig, + size_t sig_len); + +/* + * The origin of this SCT, e.g. TLS extension, OCSP response, etc. + */ +sct_source_t SCT_get_source(const SCT *sct); + +/* + * Set the origin of this SCT, e.g. TLS extension, OCSP response, etc. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_source(SCT *sct, sct_source_t source); + +/* + * Returns a text string describing the validation status of |sct|. + */ +const char *SCT_validation_status_string(const SCT *sct); + +/* + * Pretty-prints an |sct| to |out|. + * It will be indented by the number of spaces specified by |indent|. + * If |logs| is not NULL, it will be used to lookup the CT log that the SCT came + * from, so that the log name can be printed. + */ +void SCT_print(const SCT *sct, BIO *out, int indent, const CTLOG_STORE *logs); + +/* + * Pretty-prints an |sct_list| to |out|. + * It will be indented by the number of spaces specified by |indent|. + * SCTs will be delimited by |separator|. + * If |logs| is not NULL, it will be used to lookup the CT log that each SCT + * came from, so that the log names can be printed. + */ +void SCT_LIST_print(const STACK_OF(SCT) *sct_list, BIO *out, int indent, + const char *separator, const CTLOG_STORE *logs); + +/* + * Gets the last result of validating this SCT. + * If it has not been validated yet, returns SCT_VALIDATION_STATUS_NOT_SET. + */ +sct_validation_status_t SCT_get_validation_status(const SCT *sct); + +/* + * Validates the given SCT with the provided context. + * Sets the "validation_status" field of the SCT. + * Returns 1 if the SCT is valid and the signature verifies. + * Returns 0 if the SCT is invalid or could not be verified. + * Returns -1 if an error occurs. + */ +__owur int SCT_validate(SCT *sct, const CT_POLICY_EVAL_CTX *ctx); + +/* + * Validates the given list of SCTs with the provided context. + * Sets the "validation_status" field of each SCT. + * Returns 1 if there are no invalid SCTs and all signatures verify. + * Returns 0 if at least one SCT is invalid or could not be verified. + * Returns a negative integer if an error occurs. + */ +__owur int SCT_LIST_validate(const STACK_OF(SCT) *scts, + CT_POLICY_EVAL_CTX *ctx); + + +/********************************* + * SCT parsing and serialization * + *********************************/ + +/* + * Serialize (to TLS format) a stack of SCTs and return the length. + * "a" must not be NULL. + * If "pp" is NULL, just return the length of what would have been serialized. + * If "pp" is not NULL and "*pp" is null, function will allocate a new pointer + * for data that caller is responsible for freeing (only if function returns + * successfully). + * If "pp" is NULL and "*pp" is not NULL, caller is responsible for ensuring + * that "*pp" is large enough to accept all of the serialized data. + * Returns < 0 on error, >= 0 indicating bytes written (or would have been) + * on success. + */ +__owur int i2o_SCT_LIST(const STACK_OF(SCT) *a, unsigned char **pp); + +/* + * Convert TLS format SCT list to a stack of SCTs. + * If "a" or "*a" is NULL, a new stack will be created that the caller is + * responsible for freeing (by calling SCT_LIST_free). + * "**pp" and "*pp" must not be NULL. + * Upon success, "*pp" will point to after the last bytes read, and a stack + * will be returned. + * Upon failure, a NULL pointer will be returned, and the position of "*pp" is + * not defined. + */ +STACK_OF(SCT) *o2i_SCT_LIST(STACK_OF(SCT) **a, const unsigned char **pp, + size_t len); + +/* + * Serialize (to DER format) a stack of SCTs and return the length. + * "a" must not be NULL. + * If "pp" is NULL, just returns the length of what would have been serialized. + * If "pp" is not NULL and "*pp" is null, function will allocate a new pointer + * for data that caller is responsible for freeing (only if function returns + * successfully). + * If "pp" is NULL and "*pp" is not NULL, caller is responsible for ensuring + * that "*pp" is large enough to accept all of the serialized data. + * Returns < 0 on error, >= 0 indicating bytes written (or would have been) + * on success. + */ +__owur int i2d_SCT_LIST(const STACK_OF(SCT) *a, unsigned char **pp); + +/* + * Parses an SCT list in DER format and returns it. + * If "a" or "*a" is NULL, a new stack will be created that the caller is + * responsible for freeing (by calling SCT_LIST_free). + * "**pp" and "*pp" must not be NULL. + * Upon success, "*pp" will point to after the last bytes read, and a stack + * will be returned. + * Upon failure, a NULL pointer will be returned, and the position of "*pp" is + * not defined. + */ +STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a, const unsigned char **pp, + long len); + +/* + * Serialize (to TLS format) an |sct| and write it to |out|. + * If |out| is null, no SCT will be output but the length will still be returned. + * If |out| points to a null pointer, a string will be allocated to hold the + * TLS-format SCT. It is the responsibility of the caller to free it. + * If |out| points to an allocated string, the TLS-format SCT will be written + * to it. + * The length of the SCT in TLS format will be returned. + */ +__owur int i2o_SCT(const SCT *sct, unsigned char **out); + +/* + * Parses an SCT in TLS format and returns it. + * If |psct| is not null, it will end up pointing to the parsed SCT. If it + * already points to a non-null pointer, the pointer will be free'd. + * |in| should be a pointer to a string containing the TLS-format SCT. + * |in| will be advanced to the end of the SCT if parsing succeeds. + * |len| should be the length of the SCT in |in|. + * Returns NULL if an error occurs. + * If the SCT is an unsupported version, only the SCT's 'sct' and 'sct_len' + * fields will be populated (with |in| and |len| respectively). + */ +SCT *o2i_SCT(SCT **psct, const unsigned char **in, size_t len); + +/******************** + * CT log functions * + ********************/ + +/* + * Creates a new CT log instance with the given |public_key| and |name| and + * associates it with the give library context |libctx| and property query + * string |propq|. + * Takes ownership of |public_key| but copies |name|. + * Returns NULL if malloc fails or if |public_key| cannot be converted to DER. + * Should be deleted by the caller using CTLOG_free when no longer needed. + */ +CTLOG *CTLOG_new_ex(EVP_PKEY *public_key, const char *name, OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CTLOG_new_ex except that the default library context and + * property query string are used. + */ +CTLOG *CTLOG_new(EVP_PKEY *public_key, const char *name); + +/* + * Creates a new CTLOG instance with the base64-encoded SubjectPublicKeyInfo DER + * in |pkey_base64| and associated with the given library context |libctx| and + * property query string |propq|. The |name| is a string to help users identify + * this log. + * Returns 1 on success, 0 on failure. + * Should be deleted by the caller using CTLOG_free when no longer needed. + */ +int CTLOG_new_from_base64_ex(CTLOG **ct_log, const char *pkey_base64, + const char *name, OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CTLOG_new_from_base64_ex() except that the default + * library context and property query string are used. + * Returns 1 on success, 0 on failure. + */ +int CTLOG_new_from_base64(CTLOG ** ct_log, + const char *pkey_base64, const char *name); + +/* + * Deletes a CT log instance and its fields. + */ +void CTLOG_free(CTLOG *log); + +/* Gets the name of the CT log */ +const char *CTLOG_get0_name(const CTLOG *log); +/* Gets the ID of the CT log */ +void CTLOG_get0_log_id(const CTLOG *log, const uint8_t **log_id, + size_t *log_id_len); +/* Gets the public key of the CT log */ +EVP_PKEY *CTLOG_get0_public_key(const CTLOG *log); + +/************************** + * CT log store functions * + **************************/ + +/* + * Creates a new CT log store and associates it with the given libctx and + * property query string. + * Should be deleted by the caller using CTLOG_STORE_free when no longer needed. + */ +CTLOG_STORE *CTLOG_STORE_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +/* + * Same as CTLOG_STORE_new_ex except that the default libctx and + * property query string are used. + * Should be deleted by the caller using CTLOG_STORE_free when no longer needed. + */ +CTLOG_STORE *CTLOG_STORE_new(void); + +/* + * Deletes a CT log store and all of the CT log instances held within. + */ +void CTLOG_STORE_free(CTLOG_STORE *store); + +/* + * Finds a CT log in the store based on its log ID. + * Returns the CT log, or NULL if no match is found. + */ +const CTLOG *CTLOG_STORE_get0_log_by_id(const CTLOG_STORE *store, + const uint8_t *log_id, + size_t log_id_len); + +/* + * Loads a CT log list into a |store| from a |file|. + * Returns 1 if loading is successful, or 0 otherwise. + */ +__owur int CTLOG_STORE_load_file(CTLOG_STORE *store, const char *file); + +/* + * Loads the default CT log list into a |store|. + * Returns 1 if loading is successful, or 0 otherwise. + */ +__owur int CTLOG_STORE_load_default_file(CTLOG_STORE *store); + +# ifdef __cplusplus +} +# endif +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/err.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/err.h new file mode 100644 index 00000000000..b987e31f8c8 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/err.h @@ -0,0 +1,511 @@ +/* + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ERR_H +# define OPENSSL_ERR_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ERR_H +# endif + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# include +# endif + +# include +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_FILENAMES +# define ERR_PUT_error(l,f,r,fn,ln) ERR_put_error(l,f,r,fn,ln) +# else +# define ERR_PUT_error(l,f,r,fn,ln) ERR_put_error(l,f,r,NULL,0) +# endif +# endif + +# include +# include + +# define ERR_TXT_MALLOCED 0x01 +# define ERR_TXT_STRING 0x02 + +# if !defined(OPENSSL_NO_DEPRECATED_3_0) || defined(OSSL_FORCE_ERR_STATE) +# define ERR_FLAG_MARK 0x01 +# define ERR_FLAG_CLEAR 0x02 + +# define ERR_NUM_ERRORS 16 +struct err_state_st { + int err_flags[ERR_NUM_ERRORS]; + int err_marks[ERR_NUM_ERRORS]; + unsigned long err_buffer[ERR_NUM_ERRORS]; + char *err_data[ERR_NUM_ERRORS]; + size_t err_data_size[ERR_NUM_ERRORS]; + int err_data_flags[ERR_NUM_ERRORS]; + char *err_file[ERR_NUM_ERRORS]; + int err_line[ERR_NUM_ERRORS]; + char *err_func[ERR_NUM_ERRORS]; + int top, bottom; +}; +# endif + +/* library */ +# define ERR_LIB_NONE 1 +# define ERR_LIB_SYS 2 +# define ERR_LIB_BN 3 +# define ERR_LIB_RSA 4 +# define ERR_LIB_DH 5 +# define ERR_LIB_EVP 6 +# define ERR_LIB_BUF 7 +# define ERR_LIB_OBJ 8 +# define ERR_LIB_PEM 9 +# define ERR_LIB_DSA 10 +# define ERR_LIB_X509 11 +/* #define ERR_LIB_METH 12 */ +# define ERR_LIB_ASN1 13 +# define ERR_LIB_CONF 14 +# define ERR_LIB_CRYPTO 15 +# define ERR_LIB_EC 16 +# define ERR_LIB_SSL 20 +/* #define ERR_LIB_SSL23 21 */ +/* #define ERR_LIB_SSL2 22 */ +/* #define ERR_LIB_SSL3 23 */ +/* #define ERR_LIB_RSAREF 30 */ +/* #define ERR_LIB_PROXY 31 */ +# define ERR_LIB_BIO 32 +# define ERR_LIB_PKCS7 33 +# define ERR_LIB_X509V3 34 +# define ERR_LIB_PKCS12 35 +# define ERR_LIB_RAND 36 +# define ERR_LIB_DSO 37 +# define ERR_LIB_ENGINE 38 +# define ERR_LIB_OCSP 39 +# define ERR_LIB_UI 40 +# define ERR_LIB_COMP 41 +# define ERR_LIB_ECDSA 42 +# define ERR_LIB_ECDH 43 +# define ERR_LIB_OSSL_STORE 44 +# define ERR_LIB_FIPS 45 +# define ERR_LIB_CMS 46 +# define ERR_LIB_TS 47 +# define ERR_LIB_HMAC 48 +/* # define ERR_LIB_JPAKE 49 */ +# define ERR_LIB_CT 50 +# define ERR_LIB_ASYNC 51 +# define ERR_LIB_KDF 52 +# define ERR_LIB_SM2 53 +# define ERR_LIB_ESS 54 +# define ERR_LIB_PROP 55 +# define ERR_LIB_CRMF 56 +# define ERR_LIB_PROV 57 +# define ERR_LIB_CMP 58 +# define ERR_LIB_OSSL_ENCODER 59 +# define ERR_LIB_OSSL_DECODER 60 +# define ERR_LIB_HTTP 61 + +# define ERR_LIB_USER 128 + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define ASN1err(f, r) ERR_raise_data(ERR_LIB_ASN1, (r), NULL) +# define ASYNCerr(f, r) ERR_raise_data(ERR_LIB_ASYNC, (r), NULL) +# define BIOerr(f, r) ERR_raise_data(ERR_LIB_BIO, (r), NULL) +# define BNerr(f, r) ERR_raise_data(ERR_LIB_BN, (r), NULL) +# define BUFerr(f, r) ERR_raise_data(ERR_LIB_BUF, (r), NULL) +# define CMPerr(f, r) ERR_raise_data(ERR_LIB_CMP, (r), NULL) +# define CMSerr(f, r) ERR_raise_data(ERR_LIB_CMS, (r), NULL) +# define COMPerr(f, r) ERR_raise_data(ERR_LIB_COMP, (r), NULL) +# define CONFerr(f, r) ERR_raise_data(ERR_LIB_CONF, (r), NULL) +# define CRMFerr(f, r) ERR_raise_data(ERR_LIB_CRMF, (r), NULL) +# define CRYPTOerr(f, r) ERR_raise_data(ERR_LIB_CRYPTO, (r), NULL) +# define CTerr(f, r) ERR_raise_data(ERR_LIB_CT, (r), NULL) +# define DHerr(f, r) ERR_raise_data(ERR_LIB_DH, (r), NULL) +# define DSAerr(f, r) ERR_raise_data(ERR_LIB_DSA, (r), NULL) +# define DSOerr(f, r) ERR_raise_data(ERR_LIB_DSO, (r), NULL) +# define ECDHerr(f, r) ERR_raise_data(ERR_LIB_ECDH, (r), NULL) +# define ECDSAerr(f, r) ERR_raise_data(ERR_LIB_ECDSA, (r), NULL) +# define ECerr(f, r) ERR_raise_data(ERR_LIB_EC, (r), NULL) +# define ENGINEerr(f, r) ERR_raise_data(ERR_LIB_ENGINE, (r), NULL) +# define ESSerr(f, r) ERR_raise_data(ERR_LIB_ESS, (r), NULL) +# define EVPerr(f, r) ERR_raise_data(ERR_LIB_EVP, (r), NULL) +# define FIPSerr(f, r) ERR_raise_data(ERR_LIB_FIPS, (r), NULL) +# define HMACerr(f, r) ERR_raise_data(ERR_LIB_HMAC, (r), NULL) +# define HTTPerr(f, r) ERR_raise_data(ERR_LIB_HTTP, (r), NULL) +# define KDFerr(f, r) ERR_raise_data(ERR_LIB_KDF, (r), NULL) +# define OBJerr(f, r) ERR_raise_data(ERR_LIB_OBJ, (r), NULL) +# define OCSPerr(f, r) ERR_raise_data(ERR_LIB_OCSP, (r), NULL) +# define OSSL_STOREerr(f, r) ERR_raise_data(ERR_LIB_OSSL_STORE, (r), NULL) +# define PEMerr(f, r) ERR_raise_data(ERR_LIB_PEM, (r), NULL) +# define PKCS12err(f, r) ERR_raise_data(ERR_LIB_PKCS12, (r), NULL) +# define PKCS7err(f, r) ERR_raise_data(ERR_LIB_PKCS7, (r), NULL) +# define PROPerr(f, r) ERR_raise_data(ERR_LIB_PROP, (r), NULL) +# define PROVerr(f, r) ERR_raise_data(ERR_LIB_PROV, (r), NULL) +# define RANDerr(f, r) ERR_raise_data(ERR_LIB_RAND, (r), NULL) +# define RSAerr(f, r) ERR_raise_data(ERR_LIB_RSA, (r), NULL) +# define KDFerr(f, r) ERR_raise_data(ERR_LIB_KDF, (r), NULL) +# define SM2err(f, r) ERR_raise_data(ERR_LIB_SM2, (r), NULL) +# define SSLerr(f, r) ERR_raise_data(ERR_LIB_SSL, (r), NULL) +# define SYSerr(f, r) ERR_raise_data(ERR_LIB_SYS, (r), NULL) +# define TSerr(f, r) ERR_raise_data(ERR_LIB_TS, (r), NULL) +# define UIerr(f, r) ERR_raise_data(ERR_LIB_UI, (r), NULL) +# define X509V3err(f, r) ERR_raise_data(ERR_LIB_X509V3, (r), NULL) +# define X509err(f, r) ERR_raise_data(ERR_LIB_X509, (r), NULL) +# endif + +/*- + * The error code packs differently depending on if it records a system + * error or an OpenSSL error. + * + * A system error packs like this (we follow POSIX and only allow positive + * numbers that fit in an |int|): + * + * +-+-------------------------------------------------------------+ + * |1| system error number | + * +-+-------------------------------------------------------------+ + * + * An OpenSSL error packs like this: + * + * <---------------------------- 32 bits --------------------------> + * <--- 8 bits ---><------------------ 23 bits -----------------> + * +-+---------------+---------------------------------------------+ + * |0| library | reason | + * +-+---------------+---------------------------------------------+ + * + * A few of the reason bits are reserved as flags with special meaning: + * + * <5 bits-<>--------- 19 bits -----------------> + * +-------+-+-----------------------------------+ + * | rflags| | reason | + * +-------+-+-----------------------------------+ + * ^ + * | + * ERR_RFLAG_FATAL = ERR_R_FATAL + * + * The reason flags are part of the overall reason code for practical + * reasons, as they provide an easy way to place different types of + * reason codes in different numeric ranges. + * + * The currently known reason flags are: + * + * ERR_RFLAG_FATAL Flags that the reason code is considered fatal. + * For backward compatibility reasons, this flag + * is also the code for ERR_R_FATAL (that reason + * code served the dual purpose of flag and reason + * code in one in pre-3.0 OpenSSL). + * ERR_RFLAG_COMMON Flags that the reason code is common to all + * libraries. All ERR_R_ macros must use this flag, + * and no other _R_ macro is allowed to use it. + */ + +/* Macros to help decode recorded system errors */ +# define ERR_SYSTEM_FLAG ((unsigned int)INT_MAX + 1) +# define ERR_SYSTEM_MASK ((unsigned int)INT_MAX) + +/* + * Macros to help decode recorded OpenSSL errors + * As expressed above, RFLAGS and REASON overlap by one bit to allow + * ERR_R_FATAL to use ERR_RFLAG_FATAL as its reason code. + */ +# define ERR_LIB_OFFSET 23L +# define ERR_LIB_MASK 0xFF +# define ERR_RFLAGS_OFFSET 18L +# define ERR_RFLAGS_MASK 0x1F +# define ERR_REASON_MASK 0X7FFFFF + +/* + * Reason flags are defined pre-shifted to easily combine with the reason + * number. + */ +# define ERR_RFLAG_FATAL (0x1 << ERR_RFLAGS_OFFSET) +# define ERR_RFLAG_COMMON (0x2 << ERR_RFLAGS_OFFSET) + +# define ERR_SYSTEM_ERROR(errcode) (((errcode) & ERR_SYSTEM_FLAG) != 0) + +static ossl_unused ossl_inline int ERR_GET_LIB(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return ERR_LIB_SYS; + return (errcode >> ERR_LIB_OFFSET) & ERR_LIB_MASK; +} + +static ossl_unused ossl_inline int ERR_GET_RFLAGS(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return 0; + return errcode & (ERR_RFLAGS_MASK << ERR_RFLAGS_OFFSET); +} + +static ossl_unused ossl_inline int ERR_GET_REASON(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return errcode & ERR_SYSTEM_MASK; + return errcode & ERR_REASON_MASK; +} + +static ossl_unused ossl_inline int ERR_FATAL_ERROR(unsigned long errcode) +{ + return (ERR_GET_RFLAGS(errcode) & ERR_RFLAG_FATAL) != 0; +} + +static ossl_unused ossl_inline int ERR_COMMON_ERROR(unsigned long errcode) +{ + return (ERR_GET_RFLAGS(errcode) & ERR_RFLAG_COMMON) != 0; +} + +/* + * ERR_PACK is a helper macro to properly pack OpenSSL error codes and may + * only be used for that purpose. System errors are packed internally. + * ERR_PACK takes reason flags and reason code combined in |reason|. + * ERR_PACK ignores |func|, that parameter is just legacy from pre-3.0 OpenSSL. + */ +# define ERR_PACK(lib,func,reason) \ + ( (((unsigned long)(lib) & ERR_LIB_MASK ) << ERR_LIB_OFFSET) | \ + (((unsigned long)(reason) & ERR_REASON_MASK)) ) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SYS_F_FOPEN 0 +# define SYS_F_CONNECT 0 +# define SYS_F_GETSERVBYNAME 0 +# define SYS_F_SOCKET 0 +# define SYS_F_IOCTLSOCKET 0 +# define SYS_F_BIND 0 +# define SYS_F_LISTEN 0 +# define SYS_F_ACCEPT 0 +# define SYS_F_WSASTARTUP 0 +# define SYS_F_OPENDIR 0 +# define SYS_F_FREAD 0 +# define SYS_F_GETADDRINFO 0 +# define SYS_F_GETNAMEINFO 0 +# define SYS_F_SETSOCKOPT 0 +# define SYS_F_GETSOCKOPT 0 +# define SYS_F_GETSOCKNAME 0 +# define SYS_F_GETHOSTBYNAME 0 +# define SYS_F_FFLUSH 0 +# define SYS_F_OPEN 0 +# define SYS_F_CLOSE 0 +# define SYS_F_IOCTL 0 +# define SYS_F_STAT 0 +# define SYS_F_FCNTL 0 +# define SYS_F_FSTAT 0 +# define SYS_F_SENDFILE 0 +# endif + +/* + * All ERR_R_ codes must be combined with ERR_RFLAG_COMMON. + */ + +/* "we came from here" global reason codes, range 1..255 */ +# define ERR_R_SYS_LIB (ERR_LIB_SYS/* 2 */ | ERR_RFLAG_COMMON) +# define ERR_R_BN_LIB (ERR_LIB_BN/* 3 */ | ERR_RFLAG_COMMON) +# define ERR_R_RSA_LIB (ERR_LIB_RSA/* 4 */ | ERR_RFLAG_COMMON) +# define ERR_R_DH_LIB (ERR_LIB_DH/* 5 */ | ERR_RFLAG_COMMON) +# define ERR_R_EVP_LIB (ERR_LIB_EVP/* 6 */ | ERR_RFLAG_COMMON) +# define ERR_R_BUF_LIB (ERR_LIB_BUF/* 7 */ | ERR_RFLAG_COMMON) +# define ERR_R_OBJ_LIB (ERR_LIB_OBJ/* 8 */ | ERR_RFLAG_COMMON) +# define ERR_R_PEM_LIB (ERR_LIB_PEM/* 9 */ | ERR_RFLAG_COMMON) +# define ERR_R_DSA_LIB (ERR_LIB_DSA/* 10 */ | ERR_RFLAG_COMMON) +# define ERR_R_X509_LIB (ERR_LIB_X509/* 11 */ | ERR_RFLAG_COMMON) +# define ERR_R_ASN1_LIB (ERR_LIB_ASN1/* 13 */ | ERR_RFLAG_COMMON) +# define ERR_R_CONF_LIB (ERR_LIB_CONF/* 14 */ | ERR_RFLAG_COMMON) +# define ERR_R_CRYPTO_LIB (ERR_LIB_CRYPTO/* 15 */ | ERR_RFLAG_COMMON) +# define ERR_R_EC_LIB (ERR_LIB_EC/* 16 */ | ERR_RFLAG_COMMON) +# define ERR_R_SSL_LIB (ERR_LIB_SSL/* 20 */ | ERR_RFLAG_COMMON) +# define ERR_R_BIO_LIB (ERR_LIB_BIO/* 32 */ | ERR_RFLAG_COMMON) +# define ERR_R_PKCS7_LIB (ERR_LIB_PKCS7/* 33 */ | ERR_RFLAG_COMMON) +# define ERR_R_X509V3_LIB (ERR_LIB_X509V3/* 34 */ | ERR_RFLAG_COMMON) +# define ERR_R_PKCS12_LIB (ERR_LIB_PKCS12/* 35 */ | ERR_RFLAG_COMMON) +# define ERR_R_RAND_LIB (ERR_LIB_RAND/* 36 */ | ERR_RFLAG_COMMON) +# define ERR_R_DSO_LIB (ERR_LIB_DSO/* 37 */ | ERR_RFLAG_COMMON) +# define ERR_R_ENGINE_LIB (ERR_LIB_ENGINE/* 38 */ | ERR_RFLAG_COMMON) +# define ERR_R_UI_LIB (ERR_LIB_UI/* 40 */ | ERR_RFLAG_COMMON) +# define ERR_R_ECDSA_LIB (ERR_LIB_ECDSA/* 42 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_STORE_LIB (ERR_LIB_OSSL_STORE/* 44 */ | ERR_RFLAG_COMMON) +# define ERR_R_CMS_LIB (ERR_LIB_CMS/* 46 */ | ERR_RFLAG_COMMON) +# define ERR_R_TS_LIB (ERR_LIB_TS/* 47 */ | ERR_RFLAG_COMMON) +# define ERR_R_CT_LIB (ERR_LIB_CT/* 50 */ | ERR_RFLAG_COMMON) +# define ERR_R_PROV_LIB (ERR_LIB_PROV/* 57 */ | ERR_RFLAG_COMMON) +# define ERR_R_ESS_LIB (ERR_LIB_ESS/* 54 */ | ERR_RFLAG_COMMON) +# define ERR_R_CMP_LIB (ERR_LIB_CMP/* 58 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_ENCODER_LIB (ERR_LIB_OSSL_ENCODER/* 59 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_DECODER_LIB (ERR_LIB_OSSL_DECODER/* 60 */ | ERR_RFLAG_COMMON) + +/* Other common error codes, range 256..2^ERR_RFLAGS_OFFSET-1 */ +# define ERR_R_FATAL (ERR_RFLAG_FATAL|ERR_RFLAG_COMMON) +# define ERR_R_MALLOC_FAILURE (256|ERR_R_FATAL) +# define ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED (257|ERR_R_FATAL) +# define ERR_R_PASSED_NULL_PARAMETER (258|ERR_R_FATAL) +# define ERR_R_INTERNAL_ERROR (259|ERR_R_FATAL) +# define ERR_R_DISABLED (260|ERR_R_FATAL) +# define ERR_R_INIT_FAIL (261|ERR_R_FATAL) +# define ERR_R_PASSED_INVALID_ARGUMENT (262|ERR_RFLAG_COMMON) +# define ERR_R_OPERATION_FAIL (263|ERR_R_FATAL) +# define ERR_R_INVALID_PROVIDER_FUNCTIONS (264|ERR_R_FATAL) +# define ERR_R_INTERRUPTED_OR_CANCELLED (265|ERR_RFLAG_COMMON) +# define ERR_R_NESTED_ASN1_ERROR (266|ERR_RFLAG_COMMON) +# define ERR_R_MISSING_ASN1_EOS (267|ERR_RFLAG_COMMON) +# define ERR_R_UNSUPPORTED (268|ERR_RFLAG_COMMON) +# define ERR_R_FETCH_FAILED (269|ERR_RFLAG_COMMON) +# define ERR_R_INVALID_PROPERTY_DEFINITION (270|ERR_RFLAG_COMMON) +# define ERR_R_UNABLE_TO_GET_READ_LOCK (271|ERR_R_FATAL) +# define ERR_R_UNABLE_TO_GET_WRITE_LOCK (272|ERR_R_FATAL) + +typedef struct ERR_string_data_st { + unsigned long error; + const char *string; +} ERR_STRING_DATA; + +DEFINE_LHASH_OF_INTERNAL(ERR_STRING_DATA); +#define lh_ERR_STRING_DATA_new(hfn, cmp) ((LHASH_OF(ERR_STRING_DATA) *)OPENSSL_LH_new(ossl_check_ERR_STRING_DATA_lh_hashfunc_type(hfn), ossl_check_ERR_STRING_DATA_lh_compfunc_type(cmp))) +#define lh_ERR_STRING_DATA_free(lh) OPENSSL_LH_free(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_flush(lh) OPENSSL_LH_flush(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_insert(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_insert(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_delete(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_delete(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_const_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_retrieve(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_retrieve(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_const_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_error(lh) OPENSSL_LH_error(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_num_items(lh) OPENSSL_LH_num_items(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_ERR_STRING_DATA_lh_type(lh), dl) +#define lh_ERR_STRING_DATA_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_ERR_STRING_DATA_lh_doallfunc_type(dfn)) + + +/* 12 lines and some on an 80 column terminal */ +#define ERR_MAX_DATA_SIZE 1024 + +/* Building blocks */ +void ERR_new(void); +void ERR_set_debug(const char *file, int line, const char *func); +void ERR_set_error(int lib, int reason, const char *fmt, ...); +void ERR_vset_error(int lib, int reason, const char *fmt, va_list args); + +/* Main error raising functions */ +# define ERR_raise(lib, reason) ERR_raise_data((lib),(reason),NULL) +# define ERR_raise_data \ + (ERR_new(), \ + ERR_set_debug(OPENSSL_FILE,OPENSSL_LINE,OPENSSL_FUNC), \ + ERR_set_error) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* Backward compatibility */ +# define ERR_put_error(lib, func, reason, file, line) \ + (ERR_new(), \ + ERR_set_debug((file), (line), OPENSSL_FUNC), \ + ERR_set_error((lib), (reason), NULL)) +# endif + +void ERR_set_error_data(char *data, int flags); + +unsigned long ERR_get_error(void); +unsigned long ERR_get_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_get_error_line(const char **file, int *line); +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_get_error_line_data(const char **file, int *line, + const char **data, int *flags); +#endif +unsigned long ERR_peek_error(void); +unsigned long ERR_peek_error_line(const char **file, int *line); +unsigned long ERR_peek_error_func(const char **func); +unsigned long ERR_peek_error_data(const char **data, int *flags); +unsigned long ERR_peek_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_peek_error_line_data(const char **file, int *line, + const char **data, int *flags); +# endif +unsigned long ERR_peek_last_error(void); +unsigned long ERR_peek_last_error_line(const char **file, int *line); +unsigned long ERR_peek_last_error_func(const char **func); +unsigned long ERR_peek_last_error_data(const char **data, int *flags); +unsigned long ERR_peek_last_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_peek_last_error_line_data(const char **file, int *line, + const char **data, int *flags); +# endif + +void ERR_clear_error(void); + +char *ERR_error_string(unsigned long e, char *buf); +void ERR_error_string_n(unsigned long e, char *buf, size_t len); +const char *ERR_lib_error_string(unsigned long e); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 const char *ERR_func_error_string(unsigned long e); +# endif +const char *ERR_reason_error_string(unsigned long e); + +void ERR_print_errors_cb(int (*cb) (const char *str, size_t len, void *u), + void *u); +# ifndef OPENSSL_NO_STDIO +void ERR_print_errors_fp(FILE *fp); +# endif +void ERR_print_errors(BIO *bp); + +void ERR_add_error_data(int num, ...); +void ERR_add_error_vdata(int num, va_list args); +void ERR_add_error_txt(const char *sepr, const char *txt); +void ERR_add_error_mem_bio(const char *sep, BIO *bio); + +int ERR_load_strings(int lib, ERR_STRING_DATA *str); +int ERR_load_strings_const(const ERR_STRING_DATA *str); +int ERR_unload_strings(int lib, ERR_STRING_DATA *str); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define ERR_load_crypto_strings() \ + OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL) +# define ERR_free_strings() while(0) continue +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void ERR_remove_thread_state(void *); +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_0_0 +OSSL_DEPRECATEDIN_1_0_0 void ERR_remove_state(unsigned long pid); +#endif +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 ERR_STATE *ERR_get_state(void); +#endif + +int ERR_get_next_error_library(void); + +int ERR_set_mark(void); +int ERR_pop_to_mark(void); +int ERR_clear_last_mark(void); +int ERR_count_to_mark(void); + +ERR_STATE *OSSL_ERR_STATE_new(void); +void OSSL_ERR_STATE_save(ERR_STATE *es); +void OSSL_ERR_STATE_save_to_mark(ERR_STATE *es); +void OSSL_ERR_STATE_restore(const ERR_STATE *es); +void OSSL_ERR_STATE_free(ERR_STATE *es); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ess.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ess.h new file mode 100644 index 00000000000..4055bebbea2 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ess.h @@ -0,0 +1,128 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ess.h.in + * + * Copyright 2019-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ESS_H +# define OPENSSL_ESS_H +# pragma once + +# include + +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + + +typedef struct ESS_issuer_serial ESS_ISSUER_SERIAL; +typedef struct ESS_cert_id ESS_CERT_ID; +typedef struct ESS_signing_cert ESS_SIGNING_CERT; + +SKM_DEFINE_STACK_OF_INTERNAL(ESS_CERT_ID, ESS_CERT_ID, ESS_CERT_ID) +#define sk_ESS_CERT_ID_num(sk) OPENSSL_sk_num(ossl_check_const_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_value(sk, idx) ((ESS_CERT_ID *)OPENSSL_sk_value(ossl_check_const_ESS_CERT_ID_sk_type(sk), (idx))) +#define sk_ESS_CERT_ID_new(cmp) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new(ossl_check_ESS_CERT_ID_compfunc_type(cmp))) +#define sk_ESS_CERT_ID_new_null() ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new_null()) +#define sk_ESS_CERT_ID_new_reserve(cmp, n) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new_reserve(ossl_check_ESS_CERT_ID_compfunc_type(cmp), (n))) +#define sk_ESS_CERT_ID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ESS_CERT_ID_sk_type(sk), (n)) +#define sk_ESS_CERT_ID_free(sk) OPENSSL_sk_free(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_zero(sk) OPENSSL_sk_zero(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_delete(sk, i) ((ESS_CERT_ID *)OPENSSL_sk_delete(ossl_check_ESS_CERT_ID_sk_type(sk), (i))) +#define sk_ESS_CERT_ID_delete_ptr(sk, ptr) ((ESS_CERT_ID *)OPENSSL_sk_delete_ptr(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr))) +#define sk_ESS_CERT_ID_push(sk, ptr) OPENSSL_sk_push(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_pop(sk) ((ESS_CERT_ID *)OPENSSL_sk_pop(ossl_check_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_shift(sk) ((ESS_CERT_ID *)OPENSSL_sk_shift(ossl_check_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ESS_CERT_ID_sk_type(sk),ossl_check_ESS_CERT_ID_freefunc_type(freefunc)) +#define sk_ESS_CERT_ID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr), (idx)) +#define sk_ESS_CERT_ID_set(sk, idx, ptr) ((ESS_CERT_ID *)OPENSSL_sk_set(ossl_check_ESS_CERT_ID_sk_type(sk), (idx), ossl_check_ESS_CERT_ID_type(ptr))) +#define sk_ESS_CERT_ID_find(sk, ptr) OPENSSL_sk_find(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr), pnum) +#define sk_ESS_CERT_ID_sort(sk) OPENSSL_sk_sort(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_dup(sk) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_dup(ossl_check_const_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_deep_copy(ossl_check_const_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_copyfunc_type(copyfunc), ossl_check_ESS_CERT_ID_freefunc_type(freefunc))) +#define sk_ESS_CERT_ID_set_cmp_func(sk, cmp) ((sk_ESS_CERT_ID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_compfunc_type(cmp))) + + + +typedef struct ESS_signing_cert_v2_st ESS_SIGNING_CERT_V2; +typedef struct ESS_cert_id_v2_st ESS_CERT_ID_V2; + +SKM_DEFINE_STACK_OF_INTERNAL(ESS_CERT_ID_V2, ESS_CERT_ID_V2, ESS_CERT_ID_V2) +#define sk_ESS_CERT_ID_V2_num(sk) OPENSSL_sk_num(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_value(sk, idx) ((ESS_CERT_ID_V2 *)OPENSSL_sk_value(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk), (idx))) +#define sk_ESS_CERT_ID_V2_new(cmp) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new(ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp))) +#define sk_ESS_CERT_ID_V2_new_null() ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new_null()) +#define sk_ESS_CERT_ID_V2_new_reserve(cmp, n) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new_reserve(ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp), (n))) +#define sk_ESS_CERT_ID_V2_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (n)) +#define sk_ESS_CERT_ID_V2_free(sk) OPENSSL_sk_free(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_zero(sk) OPENSSL_sk_zero(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_delete(sk, i) ((ESS_CERT_ID_V2 *)OPENSSL_sk_delete(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (i))) +#define sk_ESS_CERT_ID_V2_delete_ptr(sk, ptr) ((ESS_CERT_ID_V2 *)OPENSSL_sk_delete_ptr(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr))) +#define sk_ESS_CERT_ID_V2_push(sk, ptr) OPENSSL_sk_push(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_pop(sk) ((ESS_CERT_ID_V2 *)OPENSSL_sk_pop(ossl_check_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_shift(sk) ((ESS_CERT_ID_V2 *)OPENSSL_sk_shift(ossl_check_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ESS_CERT_ID_V2_sk_type(sk),ossl_check_ESS_CERT_ID_V2_freefunc_type(freefunc)) +#define sk_ESS_CERT_ID_V2_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr), (idx)) +#define sk_ESS_CERT_ID_V2_set(sk, idx, ptr) ((ESS_CERT_ID_V2 *)OPENSSL_sk_set(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (idx), ossl_check_ESS_CERT_ID_V2_type(ptr))) +#define sk_ESS_CERT_ID_V2_find(sk, ptr) OPENSSL_sk_find(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr), pnum) +#define sk_ESS_CERT_ID_V2_sort(sk) OPENSSL_sk_sort(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_dup(sk) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_dup(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_deep_copy(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_copyfunc_type(copyfunc), ossl_check_ESS_CERT_ID_V2_freefunc_type(freefunc))) +#define sk_ESS_CERT_ID_V2_set_cmp_func(sk, cmp) ((sk_ESS_CERT_ID_V2_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp))) + + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_ISSUER_SERIAL) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_ISSUER_SERIAL, ESS_ISSUER_SERIAL) +DECLARE_ASN1_DUP_FUNCTION(ESS_ISSUER_SERIAL) + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_CERT_ID) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_CERT_ID, ESS_CERT_ID) +DECLARE_ASN1_DUP_FUNCTION(ESS_CERT_ID) + +DECLARE_ASN1_FUNCTIONS(ESS_SIGNING_CERT) +DECLARE_ASN1_DUP_FUNCTION(ESS_SIGNING_CERT) + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_CERT_ID_V2) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_CERT_ID_V2, ESS_CERT_ID_V2) +DECLARE_ASN1_DUP_FUNCTION(ESS_CERT_ID_V2) + +DECLARE_ASN1_FUNCTIONS(ESS_SIGNING_CERT_V2) +DECLARE_ASN1_DUP_FUNCTION(ESS_SIGNING_CERT_V2) + +ESS_SIGNING_CERT *OSSL_ESS_signing_cert_new_init(const X509 *signcert, + const STACK_OF(X509) *certs, + int set_issuer_serial); +ESS_SIGNING_CERT_V2 *OSSL_ESS_signing_cert_v2_new_init(const EVP_MD *hash_alg, + const X509 *signcert, + const + STACK_OF(X509) *certs, + int set_issuer_serial); +int OSSL_ESS_check_signing_certs(const ESS_SIGNING_CERT *ss, + const ESS_SIGNING_CERT_V2 *ssv2, + const STACK_OF(X509) *chain, + int require_signing_cert); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/fipskey.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/fipskey.h new file mode 100644 index 00000000000..42ba014b313 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/fipskey.h @@ -0,0 +1,36 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/fipskey.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_FIPSKEY_H +# define OPENSSL_FIPSKEY_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * The FIPS validation HMAC key, usable as an array initializer. + */ +#define FIPS_KEY_ELEMENTS \ + 0xf4, 0x55, 0x66, 0x50, 0xac, 0x31, 0xd3, 0x54, 0x61, 0x61, 0x0b, 0xac, 0x4e, 0xd8, 0x1b, 0x1a, 0x18, 0x1b, 0x2d, 0x8a, 0x43, 0xea, 0x28, 0x54, 0xcb, 0xae, 0x22, 0xca, 0x74, 0x56, 0x08, 0x13 + +/* + * The FIPS validation key, as a string. + */ +#define FIPS_KEY_STRING "f4556650ac31d35461610bac4ed81b1a181b2d8a43ea2854cbae22ca74560813" + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/lhash.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/lhash.h new file mode 100644 index 00000000000..8af9edd24a8 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/lhash.h @@ -0,0 +1,331 @@ +/* + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +/* + * Header for dynamic hash table routines Author - Eric Young + */ + +#ifndef OPENSSL_LHASH_H +# define OPENSSL_LHASH_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_LHASH_H +# endif + +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct lhash_node_st OPENSSL_LH_NODE; +typedef int (*OPENSSL_LH_COMPFUNC) (const void *, const void *); +typedef unsigned long (*OPENSSL_LH_HASHFUNC) (const void *); +typedef void (*OPENSSL_LH_DOALL_FUNC) (void *); +typedef void (*OPENSSL_LH_DOALL_FUNCARG) (void *, void *); +typedef struct lhash_st OPENSSL_LHASH; + +/* + * Macros for declaring and implementing type-safe wrappers for LHASH + * callbacks. This way, callbacks can be provided to LHASH structures without + * function pointer casting and the macro-defined callbacks provide + * per-variable casting before deferring to the underlying type-specific + * callbacks. NB: It is possible to place a "static" in front of both the + * DECLARE and IMPLEMENT macros if the functions are strictly internal. + */ + +/* First: "hash" functions */ +# define DECLARE_LHASH_HASH_FN(name, o_type) \ + unsigned long name##_LHASH_HASH(const void *); +# define IMPLEMENT_LHASH_HASH_FN(name, o_type) \ + unsigned long name##_LHASH_HASH(const void *arg) { \ + const o_type *a = arg; \ + return name##_hash(a); } +# define LHASH_HASH_FN(name) name##_LHASH_HASH + +/* Second: "compare" functions */ +# define DECLARE_LHASH_COMP_FN(name, o_type) \ + int name##_LHASH_COMP(const void *, const void *); +# define IMPLEMENT_LHASH_COMP_FN(name, o_type) \ + int name##_LHASH_COMP(const void *arg1, const void *arg2) { \ + const o_type *a = arg1; \ + const o_type *b = arg2; \ + return name##_cmp(a,b); } +# define LHASH_COMP_FN(name) name##_LHASH_COMP + +/* Fourth: "doall_arg" functions */ +# define DECLARE_LHASH_DOALL_ARG_FN(name, o_type, a_type) \ + void name##_LHASH_DOALL_ARG(void *, void *); +# define IMPLEMENT_LHASH_DOALL_ARG_FN(name, o_type, a_type) \ + void name##_LHASH_DOALL_ARG(void *arg1, void *arg2) { \ + o_type *a = arg1; \ + a_type *b = arg2; \ + name##_doall_arg(a, b); } +# define LHASH_DOALL_ARG_FN(name) name##_LHASH_DOALL_ARG + + +# define LH_LOAD_MULT 256 + +int OPENSSL_LH_error(OPENSSL_LHASH *lh); +OPENSSL_LHASH *OPENSSL_LH_new(OPENSSL_LH_HASHFUNC h, OPENSSL_LH_COMPFUNC c); +void OPENSSL_LH_free(OPENSSL_LHASH *lh); +void OPENSSL_LH_flush(OPENSSL_LHASH *lh); +void *OPENSSL_LH_insert(OPENSSL_LHASH *lh, void *data); +void *OPENSSL_LH_delete(OPENSSL_LHASH *lh, const void *data); +void *OPENSSL_LH_retrieve(OPENSSL_LHASH *lh, const void *data); +void OPENSSL_LH_doall(OPENSSL_LHASH *lh, OPENSSL_LH_DOALL_FUNC func); +void OPENSSL_LH_doall_arg(OPENSSL_LHASH *lh, OPENSSL_LH_DOALL_FUNCARG func, void *arg); +unsigned long OPENSSL_LH_strhash(const char *c); +unsigned long OPENSSL_LH_num_items(const OPENSSL_LHASH *lh); +unsigned long OPENSSL_LH_get_down_load(const OPENSSL_LHASH *lh); +void OPENSSL_LH_set_down_load(OPENSSL_LHASH *lh, unsigned long down_load); + +# ifndef OPENSSL_NO_STDIO +# ifndef OPENSSL_NO_DEPRECATED_3_1 +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_stats(const OPENSSL_LHASH *lh, FILE *fp); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_stats(const OPENSSL_LHASH *lh, FILE *fp); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_usage_stats(const OPENSSL_LHASH *lh, FILE *fp); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_1 +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_usage_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +# endif + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define _LHASH OPENSSL_LHASH +# define LHASH_NODE OPENSSL_LH_NODE +# define lh_error OPENSSL_LH_error +# define lh_new OPENSSL_LH_new +# define lh_free OPENSSL_LH_free +# define lh_insert OPENSSL_LH_insert +# define lh_delete OPENSSL_LH_delete +# define lh_retrieve OPENSSL_LH_retrieve +# define lh_doall OPENSSL_LH_doall +# define lh_doall_arg OPENSSL_LH_doall_arg +# define lh_strhash OPENSSL_LH_strhash +# define lh_num_items OPENSSL_LH_num_items +# ifndef OPENSSL_NO_STDIO +# define lh_stats OPENSSL_LH_stats +# define lh_node_stats OPENSSL_LH_node_stats +# define lh_node_usage_stats OPENSSL_LH_node_usage_stats +# endif +# define lh_stats_bio OPENSSL_LH_stats_bio +# define lh_node_stats_bio OPENSSL_LH_node_stats_bio +# define lh_node_usage_stats_bio OPENSSL_LH_node_usage_stats_bio +# endif + +/* Type checking... */ + +# define LHASH_OF(type) struct lhash_st_##type + +/* Helper macro for internal use */ +# define DEFINE_LHASH_OF_INTERNAL(type) \ + LHASH_OF(type) { \ + union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; \ + }; \ + typedef int (*lh_##type##_compfunc)(const type *a, const type *b); \ + typedef unsigned long (*lh_##type##_hashfunc)(const type *a); \ + typedef void (*lh_##type##_doallfunc)(type *a); \ + static ossl_unused ossl_inline type *\ + ossl_check_##type##_lh_plain_type(type *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const type * \ + ossl_check_const_##type##_lh_plain_type(const type *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const OPENSSL_LHASH * \ + ossl_check_const_##type##_lh_type(const LHASH_OF(type) *lh) \ + { \ + return (const OPENSSL_LHASH *)lh; \ + } \ + static ossl_unused ossl_inline OPENSSL_LHASH * \ + ossl_check_##type##_lh_type(LHASH_OF(type) *lh) \ + { \ + return (OPENSSL_LHASH *)lh; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_COMPFUNC \ + ossl_check_##type##_lh_compfunc_type(lh_##type##_compfunc cmp) \ + { \ + return (OPENSSL_LH_COMPFUNC)cmp; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_HASHFUNC \ + ossl_check_##type##_lh_hashfunc_type(lh_##type##_hashfunc hfn) \ + { \ + return (OPENSSL_LH_HASHFUNC)hfn; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_DOALL_FUNC \ + ossl_check_##type##_lh_doallfunc_type(lh_##type##_doallfunc dfn) \ + { \ + return (OPENSSL_LH_DOALL_FUNC)dfn; \ + } \ + LHASH_OF(type) + +# ifndef OPENSSL_NO_DEPRECATED_3_1 +# define DEFINE_LHASH_OF_DEPRECATED(type) \ + static ossl_unused ossl_inline void \ + lh_##type##_node_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_node_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_node_usage_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_node_usage_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } +# else +# define DEFINE_LHASH_OF_DEPRECATED(type) +# endif + +# define DEFINE_LHASH_OF_EX(type) \ + LHASH_OF(type) { \ + union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; \ + }; \ + static ossl_unused ossl_inline LHASH_OF(type) * \ + lh_##type##_new(unsigned long (*hfn)(const type *), \ + int (*cfn)(const type *, const type *)) \ + { \ + return (LHASH_OF(type) *) \ + OPENSSL_LH_new((OPENSSL_LH_HASHFUNC)hfn, (OPENSSL_LH_COMPFUNC)cfn); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_free(LHASH_OF(type) *lh) \ + { \ + OPENSSL_LH_free((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_flush(LHASH_OF(type) *lh) \ + { \ + OPENSSL_LH_flush((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_insert(LHASH_OF(type) *lh, type *d) \ + { \ + return (type *)OPENSSL_LH_insert((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_delete(LHASH_OF(type) *lh, const type *d) \ + { \ + return (type *)OPENSSL_LH_delete((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_retrieve(LHASH_OF(type) *lh, const type *d) \ + { \ + return (type *)OPENSSL_LH_retrieve((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline int \ + lh_##type##_error(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_error((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline unsigned long \ + lh_##type##_num_items(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_num_items((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline unsigned long \ + lh_##type##_get_down_load(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_get_down_load((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_set_down_load(LHASH_OF(type) *lh, unsigned long dl) \ + { \ + OPENSSL_LH_set_down_load((OPENSSL_LHASH *)lh, dl); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall(LHASH_OF(type) *lh, void (*doall)(type *)) \ + { \ + OPENSSL_LH_doall((OPENSSL_LHASH *)lh, (OPENSSL_LH_DOALL_FUNC)doall); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_arg(LHASH_OF(type) *lh, \ + void (*doallarg)(type *, void *), void *arg) \ + { \ + OPENSSL_LH_doall_arg((OPENSSL_LHASH *)lh, \ + (OPENSSL_LH_DOALL_FUNCARG)doallarg, arg); \ + } \ + LHASH_OF(type) + +# define DEFINE_LHASH_OF(type) \ + DEFINE_LHASH_OF_EX(type); \ + DEFINE_LHASH_OF_DEPRECATED(type) \ + LHASH_OF(type) + +#define IMPLEMENT_LHASH_DOALL_ARG_CONST(type, argtype) \ + int_implement_lhash_doall(type, argtype, const type) + +#define IMPLEMENT_LHASH_DOALL_ARG(type, argtype) \ + int_implement_lhash_doall(type, argtype, type) + +#define int_implement_lhash_doall(type, argtype, cbargtype) \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_##argtype(LHASH_OF(type) *lh, \ + void (*fn)(cbargtype *, argtype *), \ + argtype *arg) \ + { \ + OPENSSL_LH_doall_arg((OPENSSL_LHASH *)lh, \ + (OPENSSL_LH_DOALL_FUNCARG)fn, (void *)arg); \ + } \ + LHASH_OF(type) + +DEFINE_LHASH_OF_INTERNAL(OPENSSL_STRING); +#define lh_OPENSSL_STRING_new(hfn, cmp) ((LHASH_OF(OPENSSL_STRING) *)OPENSSL_LH_new(ossl_check_OPENSSL_STRING_lh_hashfunc_type(hfn), ossl_check_OPENSSL_STRING_lh_compfunc_type(cmp))) +#define lh_OPENSSL_STRING_free(lh) OPENSSL_LH_free(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_flush(lh) OPENSSL_LH_flush(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_insert(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_insert(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_delete(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_delete(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_const_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_retrieve(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_retrieve(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_const_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_error(lh) OPENSSL_LH_error(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_num_items(lh) OPENSSL_LH_num_items(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_OPENSSL_STRING_lh_type(lh), dl) +#define lh_OPENSSL_STRING_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_OPENSSL_STRING_lh_doallfunc_type(dfn)) +DEFINE_LHASH_OF_INTERNAL(OPENSSL_CSTRING); +#define lh_OPENSSL_CSTRING_new(hfn, cmp) ((LHASH_OF(OPENSSL_CSTRING) *)OPENSSL_LH_new(ossl_check_OPENSSL_CSTRING_lh_hashfunc_type(hfn), ossl_check_OPENSSL_CSTRING_lh_compfunc_type(cmp))) +#define lh_OPENSSL_CSTRING_free(lh) OPENSSL_LH_free(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_flush(lh) OPENSSL_LH_flush(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_insert(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_insert(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_delete(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_delete(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_const_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_retrieve(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_retrieve(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_const_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_error(lh) OPENSSL_LH_error(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_num_items(lh) OPENSSL_LH_num_items(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_OPENSSL_CSTRING_lh_type(lh), dl) +#define lh_OPENSSL_CSTRING_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_OPENSSL_CSTRING_lh_doallfunc_type(dfn)) + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ocsp.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ocsp.h new file mode 100644 index 00000000000..142b183140b --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ocsp.h @@ -0,0 +1,483 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ocsp.h.in + * + * Copyright 2000-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_OCSP_H +# define OPENSSL_OCSP_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_OCSP_H +# endif + +# include +# include +# include + +/* + * These definitions are outside the OPENSSL_NO_OCSP guard because although for + * historical reasons they have OCSP_* names, they can actually be used + * independently of OCSP. E.g. see RFC5280 + */ +/*- + * CRLReason ::= ENUMERATED { + * unspecified (0), + * keyCompromise (1), + * cACompromise (2), + * affiliationChanged (3), + * superseded (4), + * cessationOfOperation (5), + * certificateHold (6), + * -- value 7 is not used + * removeFromCRL (8), + * privilegeWithdrawn (9), + * aACompromise (10) } + */ +# define OCSP_REVOKED_STATUS_NOSTATUS -1 +# define OCSP_REVOKED_STATUS_UNSPECIFIED 0 +# define OCSP_REVOKED_STATUS_KEYCOMPROMISE 1 +# define OCSP_REVOKED_STATUS_CACOMPROMISE 2 +# define OCSP_REVOKED_STATUS_AFFILIATIONCHANGED 3 +# define OCSP_REVOKED_STATUS_SUPERSEDED 4 +# define OCSP_REVOKED_STATUS_CESSATIONOFOPERATION 5 +# define OCSP_REVOKED_STATUS_CERTIFICATEHOLD 6 +# define OCSP_REVOKED_STATUS_REMOVEFROMCRL 8 +# define OCSP_REVOKED_STATUS_PRIVILEGEWITHDRAWN 9 +# define OCSP_REVOKED_STATUS_AACOMPROMISE 10 + + +# ifndef OPENSSL_NO_OCSP + +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +/* Various flags and values */ + +# define OCSP_DEFAULT_NONCE_LENGTH 16 + +# define OCSP_NOCERTS 0x1 +# define OCSP_NOINTERN 0x2 +# define OCSP_NOSIGS 0x4 +# define OCSP_NOCHAIN 0x8 +# define OCSP_NOVERIFY 0x10 +# define OCSP_NOEXPLICIT 0x20 +# define OCSP_NOCASIGN 0x40 +# define OCSP_NODELEGATED 0x80 +# define OCSP_NOCHECKS 0x100 +# define OCSP_TRUSTOTHER 0x200 +# define OCSP_RESPID_KEY 0x400 +# define OCSP_NOTIME 0x800 +# define OCSP_PARTIAL_CHAIN 0x1000 + +typedef struct ocsp_cert_id_st OCSP_CERTID; +typedef struct ocsp_one_request_st OCSP_ONEREQ; +typedef struct ocsp_req_info_st OCSP_REQINFO; +typedef struct ocsp_signature_st OCSP_SIGNATURE; +typedef struct ocsp_request_st OCSP_REQUEST; + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_CERTID, OCSP_CERTID, OCSP_CERTID) +#define sk_OCSP_CERTID_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_value(sk, idx) ((OCSP_CERTID *)OPENSSL_sk_value(ossl_check_const_OCSP_CERTID_sk_type(sk), (idx))) +#define sk_OCSP_CERTID_new(cmp) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new(ossl_check_OCSP_CERTID_compfunc_type(cmp))) +#define sk_OCSP_CERTID_new_null() ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new_null()) +#define sk_OCSP_CERTID_new_reserve(cmp, n) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_CERTID_compfunc_type(cmp), (n))) +#define sk_OCSP_CERTID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_CERTID_sk_type(sk), (n)) +#define sk_OCSP_CERTID_free(sk) OPENSSL_sk_free(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_delete(sk, i) ((OCSP_CERTID *)OPENSSL_sk_delete(ossl_check_OCSP_CERTID_sk_type(sk), (i))) +#define sk_OCSP_CERTID_delete_ptr(sk, ptr) ((OCSP_CERTID *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr))) +#define sk_OCSP_CERTID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_pop(sk) ((OCSP_CERTID *)OPENSSL_sk_pop(ossl_check_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_shift(sk) ((OCSP_CERTID *)OPENSSL_sk_shift(ossl_check_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_CERTID_sk_type(sk),ossl_check_OCSP_CERTID_freefunc_type(freefunc)) +#define sk_OCSP_CERTID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr), (idx)) +#define sk_OCSP_CERTID_set(sk, idx, ptr) ((OCSP_CERTID *)OPENSSL_sk_set(ossl_check_OCSP_CERTID_sk_type(sk), (idx), ossl_check_OCSP_CERTID_type(ptr))) +#define sk_OCSP_CERTID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr), pnum) +#define sk_OCSP_CERTID_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_dup(sk) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_dup(ossl_check_const_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_copyfunc_type(copyfunc), ossl_check_OCSP_CERTID_freefunc_type(freefunc))) +#define sk_OCSP_CERTID_set_cmp_func(sk, cmp) ((sk_OCSP_CERTID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_ONEREQ, OCSP_ONEREQ, OCSP_ONEREQ) +#define sk_OCSP_ONEREQ_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_value(sk, idx) ((OCSP_ONEREQ *)OPENSSL_sk_value(ossl_check_const_OCSP_ONEREQ_sk_type(sk), (idx))) +#define sk_OCSP_ONEREQ_new(cmp) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new(ossl_check_OCSP_ONEREQ_compfunc_type(cmp))) +#define sk_OCSP_ONEREQ_new_null() ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new_null()) +#define sk_OCSP_ONEREQ_new_reserve(cmp, n) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_ONEREQ_compfunc_type(cmp), (n))) +#define sk_OCSP_ONEREQ_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_ONEREQ_sk_type(sk), (n)) +#define sk_OCSP_ONEREQ_free(sk) OPENSSL_sk_free(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_delete(sk, i) ((OCSP_ONEREQ *)OPENSSL_sk_delete(ossl_check_OCSP_ONEREQ_sk_type(sk), (i))) +#define sk_OCSP_ONEREQ_delete_ptr(sk, ptr) ((OCSP_ONEREQ *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr))) +#define sk_OCSP_ONEREQ_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_pop(sk) ((OCSP_ONEREQ *)OPENSSL_sk_pop(ossl_check_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_shift(sk) ((OCSP_ONEREQ *)OPENSSL_sk_shift(ossl_check_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_ONEREQ_sk_type(sk),ossl_check_OCSP_ONEREQ_freefunc_type(freefunc)) +#define sk_OCSP_ONEREQ_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr), (idx)) +#define sk_OCSP_ONEREQ_set(sk, idx, ptr) ((OCSP_ONEREQ *)OPENSSL_sk_set(ossl_check_OCSP_ONEREQ_sk_type(sk), (idx), ossl_check_OCSP_ONEREQ_type(ptr))) +#define sk_OCSP_ONEREQ_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr), pnum) +#define sk_OCSP_ONEREQ_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_dup(sk) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_dup(ossl_check_const_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_copyfunc_type(copyfunc), ossl_check_OCSP_ONEREQ_freefunc_type(freefunc))) +#define sk_OCSP_ONEREQ_set_cmp_func(sk, cmp) ((sk_OCSP_ONEREQ_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_compfunc_type(cmp))) + + +# define OCSP_RESPONSE_STATUS_SUCCESSFUL 0 +# define OCSP_RESPONSE_STATUS_MALFORMEDREQUEST 1 +# define OCSP_RESPONSE_STATUS_INTERNALERROR 2 +# define OCSP_RESPONSE_STATUS_TRYLATER 3 +# define OCSP_RESPONSE_STATUS_SIGREQUIRED 5 +# define OCSP_RESPONSE_STATUS_UNAUTHORIZED 6 + +typedef struct ocsp_resp_bytes_st OCSP_RESPBYTES; + +# define V_OCSP_RESPID_NAME 0 +# define V_OCSP_RESPID_KEY 1 + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_RESPID, OCSP_RESPID, OCSP_RESPID) +#define sk_OCSP_RESPID_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_value(sk, idx) ((OCSP_RESPID *)OPENSSL_sk_value(ossl_check_const_OCSP_RESPID_sk_type(sk), (idx))) +#define sk_OCSP_RESPID_new(cmp) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new(ossl_check_OCSP_RESPID_compfunc_type(cmp))) +#define sk_OCSP_RESPID_new_null() ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new_null()) +#define sk_OCSP_RESPID_new_reserve(cmp, n) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_RESPID_compfunc_type(cmp), (n))) +#define sk_OCSP_RESPID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_RESPID_sk_type(sk), (n)) +#define sk_OCSP_RESPID_free(sk) OPENSSL_sk_free(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_delete(sk, i) ((OCSP_RESPID *)OPENSSL_sk_delete(ossl_check_OCSP_RESPID_sk_type(sk), (i))) +#define sk_OCSP_RESPID_delete_ptr(sk, ptr) ((OCSP_RESPID *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr))) +#define sk_OCSP_RESPID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_pop(sk) ((OCSP_RESPID *)OPENSSL_sk_pop(ossl_check_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_shift(sk) ((OCSP_RESPID *)OPENSSL_sk_shift(ossl_check_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_RESPID_sk_type(sk),ossl_check_OCSP_RESPID_freefunc_type(freefunc)) +#define sk_OCSP_RESPID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr), (idx)) +#define sk_OCSP_RESPID_set(sk, idx, ptr) ((OCSP_RESPID *)OPENSSL_sk_set(ossl_check_OCSP_RESPID_sk_type(sk), (idx), ossl_check_OCSP_RESPID_type(ptr))) +#define sk_OCSP_RESPID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr), pnum) +#define sk_OCSP_RESPID_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_dup(sk) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_dup(ossl_check_const_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_copyfunc_type(copyfunc), ossl_check_OCSP_RESPID_freefunc_type(freefunc))) +#define sk_OCSP_RESPID_set_cmp_func(sk, cmp) ((sk_OCSP_RESPID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_compfunc_type(cmp))) + + +typedef struct ocsp_revoked_info_st OCSP_REVOKEDINFO; + +# define V_OCSP_CERTSTATUS_GOOD 0 +# define V_OCSP_CERTSTATUS_REVOKED 1 +# define V_OCSP_CERTSTATUS_UNKNOWN 2 + +typedef struct ocsp_cert_status_st OCSP_CERTSTATUS; +typedef struct ocsp_single_response_st OCSP_SINGLERESP; + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_SINGLERESP, OCSP_SINGLERESP, OCSP_SINGLERESP) +#define sk_OCSP_SINGLERESP_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_value(sk, idx) ((OCSP_SINGLERESP *)OPENSSL_sk_value(ossl_check_const_OCSP_SINGLERESP_sk_type(sk), (idx))) +#define sk_OCSP_SINGLERESP_new(cmp) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new(ossl_check_OCSP_SINGLERESP_compfunc_type(cmp))) +#define sk_OCSP_SINGLERESP_new_null() ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new_null()) +#define sk_OCSP_SINGLERESP_new_reserve(cmp, n) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_SINGLERESP_compfunc_type(cmp), (n))) +#define sk_OCSP_SINGLERESP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_SINGLERESP_sk_type(sk), (n)) +#define sk_OCSP_SINGLERESP_free(sk) OPENSSL_sk_free(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_delete(sk, i) ((OCSP_SINGLERESP *)OPENSSL_sk_delete(ossl_check_OCSP_SINGLERESP_sk_type(sk), (i))) +#define sk_OCSP_SINGLERESP_delete_ptr(sk, ptr) ((OCSP_SINGLERESP *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr))) +#define sk_OCSP_SINGLERESP_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_pop(sk) ((OCSP_SINGLERESP *)OPENSSL_sk_pop(ossl_check_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_shift(sk) ((OCSP_SINGLERESP *)OPENSSL_sk_shift(ossl_check_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_SINGLERESP_sk_type(sk),ossl_check_OCSP_SINGLERESP_freefunc_type(freefunc)) +#define sk_OCSP_SINGLERESP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr), (idx)) +#define sk_OCSP_SINGLERESP_set(sk, idx, ptr) ((OCSP_SINGLERESP *)OPENSSL_sk_set(ossl_check_OCSP_SINGLERESP_sk_type(sk), (idx), ossl_check_OCSP_SINGLERESP_type(ptr))) +#define sk_OCSP_SINGLERESP_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr), pnum) +#define sk_OCSP_SINGLERESP_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_dup(sk) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_dup(ossl_check_const_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_copyfunc_type(copyfunc), ossl_check_OCSP_SINGLERESP_freefunc_type(freefunc))) +#define sk_OCSP_SINGLERESP_set_cmp_func(sk, cmp) ((sk_OCSP_SINGLERESP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_compfunc_type(cmp))) + + +typedef struct ocsp_response_data_st OCSP_RESPDATA; + +typedef struct ocsp_basic_response_st OCSP_BASICRESP; + +typedef struct ocsp_crl_id_st OCSP_CRLID; +typedef struct ocsp_service_locator_st OCSP_SERVICELOC; + +# define PEM_STRING_OCSP_REQUEST "OCSP REQUEST" +# define PEM_STRING_OCSP_RESPONSE "OCSP RESPONSE" + +# define d2i_OCSP_REQUEST_bio(bp,p) ASN1_d2i_bio_of(OCSP_REQUEST,OCSP_REQUEST_new,d2i_OCSP_REQUEST,bp,p) + +# define d2i_OCSP_RESPONSE_bio(bp,p) ASN1_d2i_bio_of(OCSP_RESPONSE,OCSP_RESPONSE_new,d2i_OCSP_RESPONSE,bp,p) + +# define PEM_read_bio_OCSP_REQUEST(bp,x,cb) (OCSP_REQUEST *)PEM_ASN1_read_bio( \ + (char *(*)())d2i_OCSP_REQUEST,PEM_STRING_OCSP_REQUEST, \ + bp,(char **)(x),cb,NULL) + +# define PEM_read_bio_OCSP_RESPONSE(bp,x,cb) (OCSP_RESPONSE *)PEM_ASN1_read_bio(\ + (char *(*)())d2i_OCSP_RESPONSE,PEM_STRING_OCSP_RESPONSE, \ + bp,(char **)(x),cb,NULL) + +# define PEM_write_bio_OCSP_REQUEST(bp,o) \ + PEM_ASN1_write_bio((int (*)())i2d_OCSP_REQUEST,PEM_STRING_OCSP_REQUEST,\ + bp,(char *)(o), NULL,NULL,0,NULL,NULL) + +# define PEM_write_bio_OCSP_RESPONSE(bp,o) \ + PEM_ASN1_write_bio((int (*)())i2d_OCSP_RESPONSE,PEM_STRING_OCSP_RESPONSE,\ + bp,(char *)(o), NULL,NULL,0,NULL,NULL) + +# define i2d_OCSP_RESPONSE_bio(bp,o) ASN1_i2d_bio_of(OCSP_RESPONSE,i2d_OCSP_RESPONSE,bp,o) + +# define i2d_OCSP_REQUEST_bio(bp,o) ASN1_i2d_bio_of(OCSP_REQUEST,i2d_OCSP_REQUEST,bp,o) + +# define ASN1_BIT_STRING_digest(data,type,md,len) \ + ASN1_item_digest(ASN1_ITEM_rptr(ASN1_BIT_STRING),type,data,md,len) + +# define OCSP_CERTSTATUS_dup(cs)\ + (OCSP_CERTSTATUS*)ASN1_dup((i2d_of_void *)i2d_OCSP_CERTSTATUS,\ + (d2i_of_void *)d2i_OCSP_CERTSTATUS,(char *)(cs)) + +DECLARE_ASN1_DUP_FUNCTION(OCSP_CERTID) + +OSSL_HTTP_REQ_CTX *OCSP_sendreq_new(BIO *io, const char *path, + const OCSP_REQUEST *req, int buf_size); +OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, const char *path, OCSP_REQUEST *req); + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +typedef OSSL_HTTP_REQ_CTX OCSP_REQ_CTX; +# define OCSP_REQ_CTX_new(io, buf_size) \ + OSSL_HTTP_REQ_CTX_new(io, io, buf_size) +# define OCSP_REQ_CTX_free OSSL_HTTP_REQ_CTX_free +# define OCSP_REQ_CTX_http(rctx, op, path) \ + (OSSL_HTTP_REQ_CTX_set_expected(rctx, NULL, 1 /* asn1 */, 0, 0) && \ + OSSL_HTTP_REQ_CTX_set_request_line(rctx, strcmp(op, "POST") == 0, \ + NULL, NULL, path)) +# define OCSP_REQ_CTX_add1_header OSSL_HTTP_REQ_CTX_add1_header +# define OCSP_REQ_CTX_i2d(r, it, req) \ + OSSL_HTTP_REQ_CTX_set1_req(r, "application/ocsp-request", it, req) +# define OCSP_REQ_CTX_set1_req(r, req) \ + OCSP_REQ_CTX_i2d(r, ASN1_ITEM_rptr(OCSP_REQUEST), (ASN1_VALUE *)(req)) +# define OCSP_REQ_CTX_nbio OSSL_HTTP_REQ_CTX_nbio +# define OCSP_REQ_CTX_nbio_d2i OSSL_HTTP_REQ_CTX_nbio_d2i +# define OCSP_sendreq_nbio(p, r) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(r, (ASN1_VALUE **)(p), \ + ASN1_ITEM_rptr(OCSP_RESPONSE)) +# define OCSP_REQ_CTX_get0_mem_bio OSSL_HTTP_REQ_CTX_get0_mem_bio +# define OCSP_set_max_response_length OSSL_HTTP_REQ_CTX_set_max_response_length +# endif + +OCSP_CERTID *OCSP_cert_to_id(const EVP_MD *dgst, const X509 *subject, + const X509 *issuer); + +OCSP_CERTID *OCSP_cert_id_new(const EVP_MD *dgst, + const X509_NAME *issuerName, + const ASN1_BIT_STRING *issuerKey, + const ASN1_INTEGER *serialNumber); + +OCSP_ONEREQ *OCSP_request_add0_id(OCSP_REQUEST *req, OCSP_CERTID *cid); + +int OCSP_request_add1_nonce(OCSP_REQUEST *req, unsigned char *val, int len); +int OCSP_basic_add1_nonce(OCSP_BASICRESP *resp, unsigned char *val, int len); +int OCSP_check_nonce(OCSP_REQUEST *req, OCSP_BASICRESP *bs); +int OCSP_copy_nonce(OCSP_BASICRESP *resp, OCSP_REQUEST *req); + +int OCSP_request_set1_name(OCSP_REQUEST *req, const X509_NAME *nm); +int OCSP_request_add1_cert(OCSP_REQUEST *req, X509 *cert); + +int OCSP_request_sign(OCSP_REQUEST *req, + X509 *signer, + EVP_PKEY *key, + const EVP_MD *dgst, + STACK_OF(X509) *certs, unsigned long flags); + +int OCSP_response_status(OCSP_RESPONSE *resp); +OCSP_BASICRESP *OCSP_response_get1_basic(OCSP_RESPONSE *resp); + +const ASN1_OCTET_STRING *OCSP_resp_get0_signature(const OCSP_BASICRESP *bs); +const X509_ALGOR *OCSP_resp_get0_tbs_sigalg(const OCSP_BASICRESP *bs); +const OCSP_RESPDATA *OCSP_resp_get0_respdata(const OCSP_BASICRESP *bs); +int OCSP_resp_get0_signer(OCSP_BASICRESP *bs, X509 **signer, + STACK_OF(X509) *extra_certs); + +int OCSP_resp_count(OCSP_BASICRESP *bs); +OCSP_SINGLERESP *OCSP_resp_get0(OCSP_BASICRESP *bs, int idx); +const ASN1_GENERALIZEDTIME *OCSP_resp_get0_produced_at(const OCSP_BASICRESP* bs); +const STACK_OF(X509) *OCSP_resp_get0_certs(const OCSP_BASICRESP *bs); +int OCSP_resp_get0_id(const OCSP_BASICRESP *bs, + const ASN1_OCTET_STRING **pid, + const X509_NAME **pname); +int OCSP_resp_get1_id(const OCSP_BASICRESP *bs, + ASN1_OCTET_STRING **pid, + X509_NAME **pname); + +int OCSP_resp_find(OCSP_BASICRESP *bs, OCSP_CERTID *id, int last); +int OCSP_single_get0_status(OCSP_SINGLERESP *single, int *reason, + ASN1_GENERALIZEDTIME **revtime, + ASN1_GENERALIZEDTIME **thisupd, + ASN1_GENERALIZEDTIME **nextupd); +int OCSP_resp_find_status(OCSP_BASICRESP *bs, OCSP_CERTID *id, int *status, + int *reason, + ASN1_GENERALIZEDTIME **revtime, + ASN1_GENERALIZEDTIME **thisupd, + ASN1_GENERALIZEDTIME **nextupd); +int OCSP_check_validity(ASN1_GENERALIZEDTIME *thisupd, + ASN1_GENERALIZEDTIME *nextupd, long sec, long maxsec); + +int OCSP_request_verify(OCSP_REQUEST *req, STACK_OF(X509) *certs, + X509_STORE *store, unsigned long flags); + +# define OCSP_parse_url(url, host, port, path, ssl) \ + OSSL_HTTP_parse_url(url, ssl, NULL, host, port, NULL, path, NULL, NULL) + +int OCSP_id_issuer_cmp(const OCSP_CERTID *a, const OCSP_CERTID *b); +int OCSP_id_cmp(const OCSP_CERTID *a, const OCSP_CERTID *b); + +int OCSP_request_onereq_count(OCSP_REQUEST *req); +OCSP_ONEREQ *OCSP_request_onereq_get0(OCSP_REQUEST *req, int i); +OCSP_CERTID *OCSP_onereq_get0_id(OCSP_ONEREQ *one); +int OCSP_id_get0_info(ASN1_OCTET_STRING **piNameHash, ASN1_OBJECT **pmd, + ASN1_OCTET_STRING **pikeyHash, + ASN1_INTEGER **pserial, OCSP_CERTID *cid); +int OCSP_request_is_signed(OCSP_REQUEST *req); +OCSP_RESPONSE *OCSP_response_create(int status, OCSP_BASICRESP *bs); +OCSP_SINGLERESP *OCSP_basic_add1_status(OCSP_BASICRESP *rsp, + OCSP_CERTID *cid, + int status, int reason, + ASN1_TIME *revtime, + ASN1_TIME *thisupd, + ASN1_TIME *nextupd); +int OCSP_basic_add1_cert(OCSP_BASICRESP *resp, X509 *cert); +int OCSP_basic_sign(OCSP_BASICRESP *brsp, + X509 *signer, EVP_PKEY *key, const EVP_MD *dgst, + STACK_OF(X509) *certs, unsigned long flags); +int OCSP_basic_sign_ctx(OCSP_BASICRESP *brsp, + X509 *signer, EVP_MD_CTX *ctx, + STACK_OF(X509) *certs, unsigned long flags); +int OCSP_RESPID_set_by_name(OCSP_RESPID *respid, X509 *cert); +int OCSP_RESPID_set_by_key_ex(OCSP_RESPID *respid, X509 *cert, + OSSL_LIB_CTX *libctx, const char *propq); +int OCSP_RESPID_set_by_key(OCSP_RESPID *respid, X509 *cert); +int OCSP_RESPID_match_ex(OCSP_RESPID *respid, X509 *cert, OSSL_LIB_CTX *libctx, + const char *propq); +int OCSP_RESPID_match(OCSP_RESPID *respid, X509 *cert); + +X509_EXTENSION *OCSP_crlID_new(const char *url, long *n, char *tim); + +X509_EXTENSION *OCSP_accept_responses_new(char **oids); + +X509_EXTENSION *OCSP_archive_cutoff_new(char *tim); + +X509_EXTENSION *OCSP_url_svcloc_new(const X509_NAME *issuer, const char **urls); + +int OCSP_REQUEST_get_ext_count(OCSP_REQUEST *x); +int OCSP_REQUEST_get_ext_by_NID(OCSP_REQUEST *x, int nid, int lastpos); +int OCSP_REQUEST_get_ext_by_OBJ(OCSP_REQUEST *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_REQUEST_get_ext_by_critical(OCSP_REQUEST *x, int crit, int lastpos); +X509_EXTENSION *OCSP_REQUEST_get_ext(OCSP_REQUEST *x, int loc); +X509_EXTENSION *OCSP_REQUEST_delete_ext(OCSP_REQUEST *x, int loc); +void *OCSP_REQUEST_get1_ext_d2i(OCSP_REQUEST *x, int nid, int *crit, + int *idx); +int OCSP_REQUEST_add1_ext_i2d(OCSP_REQUEST *x, int nid, void *value, int crit, + unsigned long flags); +int OCSP_REQUEST_add_ext(OCSP_REQUEST *x, X509_EXTENSION *ex, int loc); + +int OCSP_ONEREQ_get_ext_count(OCSP_ONEREQ *x); +int OCSP_ONEREQ_get_ext_by_NID(OCSP_ONEREQ *x, int nid, int lastpos); +int OCSP_ONEREQ_get_ext_by_OBJ(OCSP_ONEREQ *x, const ASN1_OBJECT *obj, int lastpos); +int OCSP_ONEREQ_get_ext_by_critical(OCSP_ONEREQ *x, int crit, int lastpos); +X509_EXTENSION *OCSP_ONEREQ_get_ext(OCSP_ONEREQ *x, int loc); +X509_EXTENSION *OCSP_ONEREQ_delete_ext(OCSP_ONEREQ *x, int loc); +void *OCSP_ONEREQ_get1_ext_d2i(OCSP_ONEREQ *x, int nid, int *crit, int *idx); +int OCSP_ONEREQ_add1_ext_i2d(OCSP_ONEREQ *x, int nid, void *value, int crit, + unsigned long flags); +int OCSP_ONEREQ_add_ext(OCSP_ONEREQ *x, X509_EXTENSION *ex, int loc); + +int OCSP_BASICRESP_get_ext_count(OCSP_BASICRESP *x); +int OCSP_BASICRESP_get_ext_by_NID(OCSP_BASICRESP *x, int nid, int lastpos); +int OCSP_BASICRESP_get_ext_by_OBJ(OCSP_BASICRESP *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_BASICRESP_get_ext_by_critical(OCSP_BASICRESP *x, int crit, + int lastpos); +X509_EXTENSION *OCSP_BASICRESP_get_ext(OCSP_BASICRESP *x, int loc); +X509_EXTENSION *OCSP_BASICRESP_delete_ext(OCSP_BASICRESP *x, int loc); +void *OCSP_BASICRESP_get1_ext_d2i(OCSP_BASICRESP *x, int nid, int *crit, + int *idx); +int OCSP_BASICRESP_add1_ext_i2d(OCSP_BASICRESP *x, int nid, void *value, + int crit, unsigned long flags); +int OCSP_BASICRESP_add_ext(OCSP_BASICRESP *x, X509_EXTENSION *ex, int loc); + +int OCSP_SINGLERESP_get_ext_count(OCSP_SINGLERESP *x); +int OCSP_SINGLERESP_get_ext_by_NID(OCSP_SINGLERESP *x, int nid, int lastpos); +int OCSP_SINGLERESP_get_ext_by_OBJ(OCSP_SINGLERESP *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_SINGLERESP_get_ext_by_critical(OCSP_SINGLERESP *x, int crit, + int lastpos); +X509_EXTENSION *OCSP_SINGLERESP_get_ext(OCSP_SINGLERESP *x, int loc); +X509_EXTENSION *OCSP_SINGLERESP_delete_ext(OCSP_SINGLERESP *x, int loc); +void *OCSP_SINGLERESP_get1_ext_d2i(OCSP_SINGLERESP *x, int nid, int *crit, + int *idx); +int OCSP_SINGLERESP_add1_ext_i2d(OCSP_SINGLERESP *x, int nid, void *value, + int crit, unsigned long flags); +int OCSP_SINGLERESP_add_ext(OCSP_SINGLERESP *x, X509_EXTENSION *ex, int loc); +const OCSP_CERTID *OCSP_SINGLERESP_get0_id(const OCSP_SINGLERESP *x); + +DECLARE_ASN1_FUNCTIONS(OCSP_SINGLERESP) +DECLARE_ASN1_FUNCTIONS(OCSP_CERTSTATUS) +DECLARE_ASN1_FUNCTIONS(OCSP_REVOKEDINFO) +DECLARE_ASN1_FUNCTIONS(OCSP_BASICRESP) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPDATA) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPID) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPONSE) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPBYTES) +DECLARE_ASN1_FUNCTIONS(OCSP_ONEREQ) +DECLARE_ASN1_FUNCTIONS(OCSP_CERTID) +DECLARE_ASN1_FUNCTIONS(OCSP_REQUEST) +DECLARE_ASN1_FUNCTIONS(OCSP_SIGNATURE) +DECLARE_ASN1_FUNCTIONS(OCSP_REQINFO) +DECLARE_ASN1_FUNCTIONS(OCSP_CRLID) +DECLARE_ASN1_FUNCTIONS(OCSP_SERVICELOC) + +const char *OCSP_response_status_str(long s); +const char *OCSP_cert_status_str(long s); +const char *OCSP_crl_reason_str(long s); + +int OCSP_REQUEST_print(BIO *bp, OCSP_REQUEST *a, unsigned long flags); +int OCSP_RESPONSE_print(BIO *bp, OCSP_RESPONSE *o, unsigned long flags); + +int OCSP_basic_verify(OCSP_BASICRESP *bs, STACK_OF(X509) *certs, + X509_STORE *st, unsigned long flags); + + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_OCSP) */ +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h new file mode 100644 index 00000000000..b38d64da593 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h @@ -0,0 +1,114 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/opensslv.h.in + * + * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_OPENSSLV_H +# define OPENSSL_OPENSSLV_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * SECTION 1: VERSION DATA. These will change for each release + */ + +/* + * Base version macros + * + * These macros express version number MAJOR.MINOR.PATCH exactly + */ +# define OPENSSL_VERSION_MAJOR 3 +# define OPENSSL_VERSION_MINOR 2 +# define OPENSSL_VERSION_PATCH 1 + +/* + * Additional version information + * + * These are also part of the new version scheme, but aren't part + * of the version number itself. + */ + +/* Could be: #define OPENSSL_VERSION_PRE_RELEASE "-alpha.1" */ +# define OPENSSL_VERSION_PRE_RELEASE "" +/* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+fips" */ +/* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+vendor.1" */ +# define OPENSSL_VERSION_BUILD_METADATA "" + +/* + * Note: The OpenSSL Project will never define OPENSSL_VERSION_BUILD_METADATA + * to be anything but the empty string. Its use is entirely reserved for + * others + */ + +/* + * Shared library version + * + * This is strictly to express ABI version, which may or may not + * be related to the API version expressed with the macros above. + * This is defined in free form. + */ +# define OPENSSL_SHLIB_VERSION 3 + +/* + * SECTION 2: USEFUL MACROS + */ + +/* For checking general API compatibility when preprocessing */ +# define OPENSSL_VERSION_PREREQ(maj,min) \ + ((OPENSSL_VERSION_MAJOR << 16) + OPENSSL_VERSION_MINOR >= ((maj) << 16) + (min)) + +/* + * Macros to get the version in easily digested string form, both the short + * "MAJOR.MINOR.PATCH" variant (where MAJOR, MINOR and PATCH are replaced + * with the values from the corresponding OPENSSL_VERSION_ macros) and the + * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and + * OPENSSL_VERSION_BUILD_METADATA_STR appended. + */ +# define OPENSSL_VERSION_STR "3.2.1" +# define OPENSSL_FULL_VERSION_STR "3.2.1" + +/* + * SECTION 3: ADDITIONAL METADATA + * + * These strings are defined separately to allow them to be parsable. + */ +# define OPENSSL_RELEASE_DATE "30 Jan 2024" + +/* + * SECTION 4: BACKWARD COMPATIBILITY + */ + +# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" + +/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ +# ifdef OPENSSL_VERSION_PRE_RELEASE +# define _OPENSSL_VERSION_PRE_RELEASE 0x0L +# else +# define _OPENSSL_VERSION_PRE_RELEASE 0xfL +# endif +# define OPENSSL_VERSION_NUMBER \ + ( (OPENSSL_VERSION_MAJOR<<28) \ + |(OPENSSL_VERSION_MINOR<<20) \ + |(OPENSSL_VERSION_PATCH<<4) \ + |_OPENSSL_VERSION_PRE_RELEASE ) + +# ifdef __cplusplus +} +# endif + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_OPENSSLV_H +# endif + +#endif /* OPENSSL_OPENSSLV_H */ diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs12.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs12.h new file mode 100644 index 00000000000..b08b0bc214c --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs12.h @@ -0,0 +1,363 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/pkcs12.h.in + * + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_PKCS12_H +# define OPENSSL_PKCS12_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_PKCS12_H +# endif + +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define PKCS12_KEY_ID 1 +# define PKCS12_IV_ID 2 +# define PKCS12_MAC_ID 3 + +/* Default iteration count */ +# ifndef PKCS12_DEFAULT_ITER +# define PKCS12_DEFAULT_ITER PKCS5_DEFAULT_ITER +# endif + +# define PKCS12_MAC_KEY_LENGTH 20 + +/* The macro is expected to be used only internally. Kept for backwards compatibility. */ +# define PKCS12_SALT_LEN 8 + +/* It's not clear if these are actually needed... */ +# define PKCS12_key_gen PKCS12_key_gen_utf8 +# define PKCS12_add_friendlyname PKCS12_add_friendlyname_utf8 + +/* MS key usage constants */ + +# define KEY_EX 0x10 +# define KEY_SIG 0x80 + +typedef struct PKCS12_MAC_DATA_st PKCS12_MAC_DATA; + +typedef struct PKCS12_st PKCS12; + +typedef struct PKCS12_SAFEBAG_st PKCS12_SAFEBAG; + +SKM_DEFINE_STACK_OF_INTERNAL(PKCS12_SAFEBAG, PKCS12_SAFEBAG, PKCS12_SAFEBAG) +#define sk_PKCS12_SAFEBAG_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_value(sk, idx) ((PKCS12_SAFEBAG *)OPENSSL_sk_value(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk), (idx))) +#define sk_PKCS12_SAFEBAG_new(cmp) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new(ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp))) +#define sk_PKCS12_SAFEBAG_new_null() ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new_null()) +#define sk_PKCS12_SAFEBAG_new_reserve(cmp, n) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new_reserve(ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp), (n))) +#define sk_PKCS12_SAFEBAG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (n)) +#define sk_PKCS12_SAFEBAG_free(sk) OPENSSL_sk_free(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_delete(sk, i) ((PKCS12_SAFEBAG *)OPENSSL_sk_delete(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (i))) +#define sk_PKCS12_SAFEBAG_delete_ptr(sk, ptr) ((PKCS12_SAFEBAG *)OPENSSL_sk_delete_ptr(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr))) +#define sk_PKCS12_SAFEBAG_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_pop(sk) ((PKCS12_SAFEBAG *)OPENSSL_sk_pop(ossl_check_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_shift(sk) ((PKCS12_SAFEBAG *)OPENSSL_sk_shift(ossl_check_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS12_SAFEBAG_sk_type(sk),ossl_check_PKCS12_SAFEBAG_freefunc_type(freefunc)) +#define sk_PKCS12_SAFEBAG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr), (idx)) +#define sk_PKCS12_SAFEBAG_set(sk, idx, ptr) ((PKCS12_SAFEBAG *)OPENSSL_sk_set(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (idx), ossl_check_PKCS12_SAFEBAG_type(ptr))) +#define sk_PKCS12_SAFEBAG_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr), pnum) +#define sk_PKCS12_SAFEBAG_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_dup(sk) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_dup(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_copyfunc_type(copyfunc), ossl_check_PKCS12_SAFEBAG_freefunc_type(freefunc))) +#define sk_PKCS12_SAFEBAG_set_cmp_func(sk, cmp) ((sk_PKCS12_SAFEBAG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp))) + + +typedef struct pkcs12_bag_st PKCS12_BAGS; + +# define PKCS12_ERROR 0 +# define PKCS12_OK 1 + +/* Compatibility macros */ + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 + +# define M_PKCS12_bag_type PKCS12_bag_type +# define M_PKCS12_cert_bag_type PKCS12_cert_bag_type +# define M_PKCS12_crl_bag_type PKCS12_cert_bag_type + +# define PKCS12_certbag2x509 PKCS12_SAFEBAG_get1_cert +# define PKCS12_certbag2scrl PKCS12_SAFEBAG_get1_crl +# define PKCS12_bag_type PKCS12_SAFEBAG_get_nid +# define PKCS12_cert_bag_type PKCS12_SAFEBAG_get_bag_nid +# define PKCS12_x5092certbag PKCS12_SAFEBAG_create_cert +# define PKCS12_x509crl2certbag PKCS12_SAFEBAG_create_crl +# define PKCS12_MAKE_KEYBAG PKCS12_SAFEBAG_create0_p8inf +# define PKCS12_MAKE_SHKEYBAG PKCS12_SAFEBAG_create_pkcs8_encrypt + +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 ASN1_TYPE *PKCS12_get_attr(const PKCS12_SAFEBAG *bag, + int attr_nid); +#endif + +ASN1_TYPE *PKCS8_get_attr(PKCS8_PRIV_KEY_INFO *p8, int attr_nid); +int PKCS12_mac_present(const PKCS12 *p12); +void PKCS12_get0_mac(const ASN1_OCTET_STRING **pmac, + const X509_ALGOR **pmacalg, + const ASN1_OCTET_STRING **psalt, + const ASN1_INTEGER **piter, + const PKCS12 *p12); + +const ASN1_TYPE *PKCS12_SAFEBAG_get0_attr(const PKCS12_SAFEBAG *bag, + int attr_nid); +const ASN1_OBJECT *PKCS12_SAFEBAG_get0_type(const PKCS12_SAFEBAG *bag); +int PKCS12_SAFEBAG_get_nid(const PKCS12_SAFEBAG *bag); +int PKCS12_SAFEBAG_get_bag_nid(const PKCS12_SAFEBAG *bag); +const ASN1_TYPE *PKCS12_SAFEBAG_get0_bag_obj(const PKCS12_SAFEBAG *bag); +const ASN1_OBJECT *PKCS12_SAFEBAG_get0_bag_type(const PKCS12_SAFEBAG *bag); + +X509 *PKCS12_SAFEBAG_get1_cert_ex(const PKCS12_SAFEBAG *bag, OSSL_LIB_CTX *libctx, const char *propq); +X509 *PKCS12_SAFEBAG_get1_cert(const PKCS12_SAFEBAG *bag); +X509_CRL *PKCS12_SAFEBAG_get1_crl_ex(const PKCS12_SAFEBAG *bag, OSSL_LIB_CTX *libctx, const char *propq); +X509_CRL *PKCS12_SAFEBAG_get1_crl(const PKCS12_SAFEBAG *bag); +const STACK_OF(PKCS12_SAFEBAG) * +PKCS12_SAFEBAG_get0_safes(const PKCS12_SAFEBAG *bag); +const PKCS8_PRIV_KEY_INFO *PKCS12_SAFEBAG_get0_p8inf(const PKCS12_SAFEBAG *bag); +const X509_SIG *PKCS12_SAFEBAG_get0_pkcs8(const PKCS12_SAFEBAG *bag); + +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_cert(X509 *x509); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_crl(X509_CRL *crl); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_secret(int type, int vtype, const unsigned char *value, int len); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create0_p8inf(PKCS8_PRIV_KEY_INFO *p8); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create0_pkcs8(X509_SIG *p8); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_pkcs8_encrypt(int pbe_nid, + const char *pass, + int passlen, + unsigned char *salt, + int saltlen, int iter, + PKCS8_PRIV_KEY_INFO *p8inf); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_pkcs8_encrypt_ex(int pbe_nid, + const char *pass, + int passlen, + unsigned char *salt, + int saltlen, int iter, + PKCS8_PRIV_KEY_INFO *p8inf, + OSSL_LIB_CTX *ctx, + const char *propq); + +PKCS12_SAFEBAG *PKCS12_item_pack_safebag(void *obj, const ASN1_ITEM *it, + int nid1, int nid2); +PKCS8_PRIV_KEY_INFO *PKCS8_decrypt(const X509_SIG *p8, const char *pass, + int passlen); +PKCS8_PRIV_KEY_INFO *PKCS8_decrypt_ex(const X509_SIG *p8, const char *pass, + int passlen, OSSL_LIB_CTX *ctx, + const char *propq); +PKCS8_PRIV_KEY_INFO *PKCS12_decrypt_skey(const PKCS12_SAFEBAG *bag, + const char *pass, int passlen); +PKCS8_PRIV_KEY_INFO *PKCS12_decrypt_skey_ex(const PKCS12_SAFEBAG *bag, + const char *pass, int passlen, + OSSL_LIB_CTX *ctx, + const char *propq); +X509_SIG *PKCS8_encrypt(int pbe_nid, const EVP_CIPHER *cipher, + const char *pass, int passlen, unsigned char *salt, + int saltlen, int iter, PKCS8_PRIV_KEY_INFO *p8); +X509_SIG *PKCS8_encrypt_ex(int pbe_nid, const EVP_CIPHER *cipher, + const char *pass, int passlen, unsigned char *salt, + int saltlen, int iter, PKCS8_PRIV_KEY_INFO *p8, + OSSL_LIB_CTX *ctx, const char *propq); +X509_SIG *PKCS8_set0_pbe(const char *pass, int passlen, + PKCS8_PRIV_KEY_INFO *p8inf, X509_ALGOR *pbe); +X509_SIG *PKCS8_set0_pbe_ex(const char *pass, int passlen, + PKCS8_PRIV_KEY_INFO *p8inf, X509_ALGOR *pbe, + OSSL_LIB_CTX *ctx, const char *propq); +PKCS7 *PKCS12_pack_p7data(STACK_OF(PKCS12_SAFEBAG) *sk); +STACK_OF(PKCS12_SAFEBAG) *PKCS12_unpack_p7data(PKCS7 *p7); +PKCS7 *PKCS12_pack_p7encdata(int pbe_nid, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + STACK_OF(PKCS12_SAFEBAG) *bags); +PKCS7 *PKCS12_pack_p7encdata_ex(int pbe_nid, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + STACK_OF(PKCS12_SAFEBAG) *bags, + OSSL_LIB_CTX *ctx, const char *propq); + +STACK_OF(PKCS12_SAFEBAG) *PKCS12_unpack_p7encdata(PKCS7 *p7, const char *pass, + int passlen); + +int PKCS12_pack_authsafes(PKCS12 *p12, STACK_OF(PKCS7) *safes); +STACK_OF(PKCS7) *PKCS12_unpack_authsafes(const PKCS12 *p12); + +int PKCS12_add_localkeyid(PKCS12_SAFEBAG *bag, unsigned char *name, + int namelen); +int PKCS12_add_friendlyname_asc(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_friendlyname_utf8(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_CSPName_asc(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_friendlyname_uni(PKCS12_SAFEBAG *bag, + const unsigned char *name, int namelen); +int PKCS12_add1_attr_by_NID(PKCS12_SAFEBAG *bag, int nid, int type, + const unsigned char *bytes, int len); +int PKCS12_add1_attr_by_txt(PKCS12_SAFEBAG *bag, const char *attrname, int type, + const unsigned char *bytes, int len); +int PKCS8_add_keyusage(PKCS8_PRIV_KEY_INFO *p8, int usage); +ASN1_TYPE *PKCS12_get_attr_gen(const STACK_OF(X509_ATTRIBUTE) *attrs, + int attr_nid); +char *PKCS12_get_friendlyname(PKCS12_SAFEBAG *bag); +const STACK_OF(X509_ATTRIBUTE) * +PKCS12_SAFEBAG_get0_attrs(const PKCS12_SAFEBAG *bag); +void PKCS12_SAFEBAG_set0_attrs(PKCS12_SAFEBAG *bag, STACK_OF(X509_ATTRIBUTE) *attrs); +unsigned char *PKCS12_pbe_crypt(const X509_ALGOR *algor, + const char *pass, int passlen, + const unsigned char *in, int inlen, + unsigned char **data, int *datalen, + int en_de); +unsigned char *PKCS12_pbe_crypt_ex(const X509_ALGOR *algor, + const char *pass, int passlen, + const unsigned char *in, int inlen, + unsigned char **data, int *datalen, + int en_de, OSSL_LIB_CTX *libctx, + const char *propq); +void *PKCS12_item_decrypt_d2i(const X509_ALGOR *algor, const ASN1_ITEM *it, + const char *pass, int passlen, + const ASN1_OCTET_STRING *oct, int zbuf); +void *PKCS12_item_decrypt_d2i_ex(const X509_ALGOR *algor, const ASN1_ITEM *it, + const char *pass, int passlen, + const ASN1_OCTET_STRING *oct, int zbuf, + OSSL_LIB_CTX *libctx, + const char *propq); +ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt(X509_ALGOR *algor, + const ASN1_ITEM *it, + const char *pass, int passlen, + void *obj, int zbuf); +ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt_ex(X509_ALGOR *algor, + const ASN1_ITEM *it, + const char *pass, int passlen, + void *obj, int zbuf, + OSSL_LIB_CTX *ctx, + const char *propq); +PKCS12 *PKCS12_init(int mode); +PKCS12 *PKCS12_init_ex(int mode, OSSL_LIB_CTX *ctx, const char *propq); + +int PKCS12_key_gen_asc(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_asc_ex(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); +int PKCS12_key_gen_uni(unsigned char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_uni_ex(unsigned char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); +int PKCS12_key_gen_utf8(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_utf8_ex(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); + +int PKCS12_PBE_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, + ASN1_TYPE *param, const EVP_CIPHER *cipher, + const EVP_MD *md_type, int en_de); +int PKCS12_PBE_keyivgen_ex(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, + ASN1_TYPE *param, const EVP_CIPHER *cipher, + const EVP_MD *md_type, int en_de, + OSSL_LIB_CTX *libctx, const char *propq); +int PKCS12_gen_mac(PKCS12 *p12, const char *pass, int passlen, + unsigned char *mac, unsigned int *maclen); +int PKCS12_verify_mac(PKCS12 *p12, const char *pass, int passlen); +int PKCS12_set_mac(PKCS12 *p12, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + const EVP_MD *md_type); +int PKCS12_setup_mac(PKCS12 *p12, int iter, unsigned char *salt, + int saltlen, const EVP_MD *md_type); +unsigned char *OPENSSL_asc2uni(const char *asc, int asclen, + unsigned char **uni, int *unilen); +char *OPENSSL_uni2asc(const unsigned char *uni, int unilen); +unsigned char *OPENSSL_utf82uni(const char *asc, int asclen, + unsigned char **uni, int *unilen); +char *OPENSSL_uni2utf8(const unsigned char *uni, int unilen); + +DECLARE_ASN1_FUNCTIONS(PKCS12) +DECLARE_ASN1_FUNCTIONS(PKCS12_MAC_DATA) +DECLARE_ASN1_FUNCTIONS(PKCS12_SAFEBAG) +DECLARE_ASN1_FUNCTIONS(PKCS12_BAGS) + +DECLARE_ASN1_ITEM(PKCS12_SAFEBAGS) +DECLARE_ASN1_ITEM(PKCS12_AUTHSAFES) + +void PKCS12_PBE_add(void); +int PKCS12_parse(PKCS12 *p12, const char *pass, EVP_PKEY **pkey, X509 **cert, + STACK_OF(X509) **ca); +typedef int PKCS12_create_cb(PKCS12_SAFEBAG *bag, void *cbarg); +PKCS12 *PKCS12_create(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype); +PKCS12 *PKCS12_create_ex(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype, + OSSL_LIB_CTX *ctx, const char *propq); +PKCS12 *PKCS12_create_ex2(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype, + OSSL_LIB_CTX *ctx, const char *propq, + PKCS12_create_cb *cb, void *cbarg); + +PKCS12_SAFEBAG *PKCS12_add_cert(STACK_OF(PKCS12_SAFEBAG) **pbags, X509 *cert); +PKCS12_SAFEBAG *PKCS12_add_key(STACK_OF(PKCS12_SAFEBAG) **pbags, + EVP_PKEY *key, int key_usage, int iter, + int key_nid, const char *pass); +PKCS12_SAFEBAG *PKCS12_add_key_ex(STACK_OF(PKCS12_SAFEBAG) **pbags, + EVP_PKEY *key, int key_usage, int iter, + int key_nid, const char *pass, + OSSL_LIB_CTX *ctx, const char *propq); + +PKCS12_SAFEBAG *PKCS12_add_secret(STACK_OF(PKCS12_SAFEBAG) **pbags, + int nid_type, const unsigned char *value, int len); +int PKCS12_add_safe(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags, + int safe_nid, int iter, const char *pass); +int PKCS12_add_safe_ex(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags, + int safe_nid, int iter, const char *pass, + OSSL_LIB_CTX *ctx, const char *propq); + +PKCS12 *PKCS12_add_safes(STACK_OF(PKCS7) *safes, int p7_nid); +PKCS12 *PKCS12_add_safes_ex(STACK_OF(PKCS7) *safes, int p7_nid, + OSSL_LIB_CTX *ctx, const char *propq); + +int i2d_PKCS12_bio(BIO *bp, const PKCS12 *p12); +# ifndef OPENSSL_NO_STDIO +int i2d_PKCS12_fp(FILE *fp, const PKCS12 *p12); +# endif +PKCS12 *d2i_PKCS12_bio(BIO *bp, PKCS12 **p12); +# ifndef OPENSSL_NO_STDIO +PKCS12 *d2i_PKCS12_fp(FILE *fp, PKCS12 **p12); +# endif +int PKCS12_newpass(PKCS12 *p12, const char *oldpass, const char *newpass); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs7.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs7.h new file mode 100644 index 00000000000..dc46c51118a --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs7.h @@ -0,0 +1,430 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/pkcs7.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_PKCS7_H +# define OPENSSL_PKCS7_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_PKCS7_H +# endif + +# include +# include +# include + +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/*- +Encryption_ID DES-CBC +Digest_ID MD5 +Digest_Encryption_ID rsaEncryption +Key_Encryption_ID rsaEncryption +*/ + +typedef struct PKCS7_CTX_st { + OSSL_LIB_CTX *libctx; + char *propq; +} PKCS7_CTX; + +typedef struct pkcs7_issuer_and_serial_st { + X509_NAME *issuer; + ASN1_INTEGER *serial; +} PKCS7_ISSUER_AND_SERIAL; + +typedef struct pkcs7_signer_info_st { + ASN1_INTEGER *version; /* version 1 */ + PKCS7_ISSUER_AND_SERIAL *issuer_and_serial; + X509_ALGOR *digest_alg; + STACK_OF(X509_ATTRIBUTE) *auth_attr; /* [ 0 ] */ + X509_ALGOR *digest_enc_alg; /* confusing name, actually used for signing */ + ASN1_OCTET_STRING *enc_digest; /* confusing name, actually signature */ + STACK_OF(X509_ATTRIBUTE) *unauth_attr; /* [ 1 ] */ + /* The private key to sign with */ + EVP_PKEY *pkey; + const PKCS7_CTX *ctx; +} PKCS7_SIGNER_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7_SIGNER_INFO, PKCS7_SIGNER_INFO, PKCS7_SIGNER_INFO) +#define sk_PKCS7_SIGNER_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_value(sk, idx) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_value(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk), (idx))) +#define sk_PKCS7_SIGNER_INFO_new(cmp) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new(ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp))) +#define sk_PKCS7_SIGNER_INFO_new_null() ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_SIGNER_INFO_new_reserve(cmp, n) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp), (n))) +#define sk_PKCS7_SIGNER_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (n)) +#define sk_PKCS7_SIGNER_INFO_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_delete(sk, i) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_delete(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (i))) +#define sk_PKCS7_SIGNER_INFO_delete_ptr(sk, ptr) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr))) +#define sk_PKCS7_SIGNER_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_pop(sk) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_pop(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_shift(sk) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_shift(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk),ossl_check_PKCS7_SIGNER_INFO_freefunc_type(freefunc)) +#define sk_PKCS7_SIGNER_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr), (idx)) +#define sk_PKCS7_SIGNER_INFO_set(sk, idx, ptr) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_set(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (idx), ossl_check_PKCS7_SIGNER_INFO_type(ptr))) +#define sk_PKCS7_SIGNER_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr), pnum) +#define sk_PKCS7_SIGNER_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_dup(sk) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_copyfunc_type(copyfunc), ossl_check_PKCS7_SIGNER_INFO_freefunc_type(freefunc))) +#define sk_PKCS7_SIGNER_INFO_set_cmp_func(sk, cmp) ((sk_PKCS7_SIGNER_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp))) + + +typedef struct pkcs7_recip_info_st { + ASN1_INTEGER *version; /* version 0 */ + PKCS7_ISSUER_AND_SERIAL *issuer_and_serial; + X509_ALGOR *key_enc_algor; + ASN1_OCTET_STRING *enc_key; + X509 *cert; /* get the pub-key from this */ + const PKCS7_CTX *ctx; +} PKCS7_RECIP_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7_RECIP_INFO, PKCS7_RECIP_INFO, PKCS7_RECIP_INFO) +#define sk_PKCS7_RECIP_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_value(sk, idx) ((PKCS7_RECIP_INFO *)OPENSSL_sk_value(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk), (idx))) +#define sk_PKCS7_RECIP_INFO_new(cmp) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new(ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp))) +#define sk_PKCS7_RECIP_INFO_new_null() ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_RECIP_INFO_new_reserve(cmp, n) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp), (n))) +#define sk_PKCS7_RECIP_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (n)) +#define sk_PKCS7_RECIP_INFO_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_delete(sk, i) ((PKCS7_RECIP_INFO *)OPENSSL_sk_delete(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (i))) +#define sk_PKCS7_RECIP_INFO_delete_ptr(sk, ptr) ((PKCS7_RECIP_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr))) +#define sk_PKCS7_RECIP_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_pop(sk) ((PKCS7_RECIP_INFO *)OPENSSL_sk_pop(ossl_check_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_shift(sk) ((PKCS7_RECIP_INFO *)OPENSSL_sk_shift(ossl_check_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_RECIP_INFO_sk_type(sk),ossl_check_PKCS7_RECIP_INFO_freefunc_type(freefunc)) +#define sk_PKCS7_RECIP_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr), (idx)) +#define sk_PKCS7_RECIP_INFO_set(sk, idx, ptr) ((PKCS7_RECIP_INFO *)OPENSSL_sk_set(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (idx), ossl_check_PKCS7_RECIP_INFO_type(ptr))) +#define sk_PKCS7_RECIP_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr), pnum) +#define sk_PKCS7_RECIP_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_dup(sk) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_copyfunc_type(copyfunc), ossl_check_PKCS7_RECIP_INFO_freefunc_type(freefunc))) +#define sk_PKCS7_RECIP_INFO_set_cmp_func(sk, cmp) ((sk_PKCS7_RECIP_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp))) + + + +typedef struct pkcs7_signed_st { + ASN1_INTEGER *version; /* version 1 */ + STACK_OF(X509_ALGOR) *md_algs; /* md used */ + STACK_OF(X509) *cert; /* [ 0 ] */ /* name should be 'certificates' */ + STACK_OF(X509_CRL) *crl; /* [ 1 ] */ /* name should be 'crls' */ + STACK_OF(PKCS7_SIGNER_INFO) *signer_info; + struct pkcs7_st *contents; +} PKCS7_SIGNED; +/* + * The above structure is very very similar to PKCS7_SIGN_ENVELOPE. How about + * merging the two + */ + +typedef struct pkcs7_enc_content_st { + ASN1_OBJECT *content_type; + X509_ALGOR *algorithm; + ASN1_OCTET_STRING *enc_data; /* [ 0 ] */ + const EVP_CIPHER *cipher; + const PKCS7_CTX *ctx; +} PKCS7_ENC_CONTENT; + +typedef struct pkcs7_enveloped_st { + ASN1_INTEGER *version; /* version 0 */ + STACK_OF(PKCS7_RECIP_INFO) *recipientinfo; + PKCS7_ENC_CONTENT *enc_data; +} PKCS7_ENVELOPE; + +typedef struct pkcs7_signedandenveloped_st { + ASN1_INTEGER *version; /* version 1 */ + STACK_OF(X509_ALGOR) *md_algs; /* md used */ + STACK_OF(X509) *cert; /* [ 0 ] */ /* name should be 'certificates' */ + STACK_OF(X509_CRL) *crl; /* [ 1 ] */ /* name should be 'crls' */ + STACK_OF(PKCS7_SIGNER_INFO) *signer_info; + PKCS7_ENC_CONTENT *enc_data; + STACK_OF(PKCS7_RECIP_INFO) *recipientinfo; +} PKCS7_SIGN_ENVELOPE; + +typedef struct pkcs7_digest_st { + ASN1_INTEGER *version; /* version 0 */ + X509_ALGOR *md; /* md used */ + struct pkcs7_st *contents; + ASN1_OCTET_STRING *digest; +} PKCS7_DIGEST; + +typedef struct pkcs7_encrypted_st { + ASN1_INTEGER *version; /* version 0 */ + PKCS7_ENC_CONTENT *enc_data; +} PKCS7_ENCRYPT; + +typedef struct pkcs7_st { + /* + * The following is non NULL if it contains ASN1 encoding of this + * structure + */ + unsigned char *asn1; + long length; +# define PKCS7_S_HEADER 0 +# define PKCS7_S_BODY 1 +# define PKCS7_S_TAIL 2 + int state; /* used during processing */ + int detached; + ASN1_OBJECT *type; + /* content as defined by the type */ + /* + * all encryption/message digests are applied to the 'contents', leaving + * out the 'type' field. + */ + union { + char *ptr; + /* NID_pkcs7_data */ + ASN1_OCTET_STRING *data; + /* NID_pkcs7_signed */ + PKCS7_SIGNED *sign; /* field name 'signed' would clash with C keyword */ + /* NID_pkcs7_enveloped */ + PKCS7_ENVELOPE *enveloped; + /* NID_pkcs7_signedAndEnveloped */ + PKCS7_SIGN_ENVELOPE *signed_and_enveloped; + /* NID_pkcs7_digest */ + PKCS7_DIGEST *digest; + /* NID_pkcs7_encrypted */ + PKCS7_ENCRYPT *encrypted; + /* Anything else */ + ASN1_TYPE *other; + } d; + PKCS7_CTX ctx; +} PKCS7; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7, PKCS7, PKCS7) +#define sk_PKCS7_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_sk_type(sk)) +#define sk_PKCS7_value(sk, idx) ((PKCS7 *)OPENSSL_sk_value(ossl_check_const_PKCS7_sk_type(sk), (idx))) +#define sk_PKCS7_new(cmp) ((STACK_OF(PKCS7) *)OPENSSL_sk_new(ossl_check_PKCS7_compfunc_type(cmp))) +#define sk_PKCS7_new_null() ((STACK_OF(PKCS7) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_new_reserve(cmp, n) ((STACK_OF(PKCS7) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_compfunc_type(cmp), (n))) +#define sk_PKCS7_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_sk_type(sk), (n)) +#define sk_PKCS7_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_delete(sk, i) ((PKCS7 *)OPENSSL_sk_delete(ossl_check_PKCS7_sk_type(sk), (i))) +#define sk_PKCS7_delete_ptr(sk, ptr) ((PKCS7 *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr))) +#define sk_PKCS7_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_pop(sk) ((PKCS7 *)OPENSSL_sk_pop(ossl_check_PKCS7_sk_type(sk))) +#define sk_PKCS7_shift(sk) ((PKCS7 *)OPENSSL_sk_shift(ossl_check_PKCS7_sk_type(sk))) +#define sk_PKCS7_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_sk_type(sk),ossl_check_PKCS7_freefunc_type(freefunc)) +#define sk_PKCS7_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr), (idx)) +#define sk_PKCS7_set(sk, idx, ptr) ((PKCS7 *)OPENSSL_sk_set(ossl_check_PKCS7_sk_type(sk), (idx), ossl_check_PKCS7_type(ptr))) +#define sk_PKCS7_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr), pnum) +#define sk_PKCS7_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_sk_type(sk)) +#define sk_PKCS7_dup(sk) ((STACK_OF(PKCS7) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_sk_type(sk))) +#define sk_PKCS7_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_sk_type(sk), ossl_check_PKCS7_copyfunc_type(copyfunc), ossl_check_PKCS7_freefunc_type(freefunc))) +#define sk_PKCS7_set_cmp_func(sk, cmp) ((sk_PKCS7_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_compfunc_type(cmp))) + + + +# define PKCS7_OP_SET_DETACHED_SIGNATURE 1 +# define PKCS7_OP_GET_DETACHED_SIGNATURE 2 + +# define PKCS7_get_signed_attributes(si) ((si)->auth_attr) +# define PKCS7_get_attributes(si) ((si)->unauth_attr) + +# define PKCS7_type_is_signed(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_signed) +# define PKCS7_type_is_encrypted(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_encrypted) +# define PKCS7_type_is_enveloped(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_enveloped) +# define PKCS7_type_is_signedAndEnveloped(a) \ + (OBJ_obj2nid((a)->type) == NID_pkcs7_signedAndEnveloped) +# define PKCS7_type_is_data(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_data) +# define PKCS7_type_is_digest(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_digest) + +# define PKCS7_set_detached(p,v) \ + PKCS7_ctrl(p,PKCS7_OP_SET_DETACHED_SIGNATURE,v,NULL) +# define PKCS7_get_detached(p) \ + PKCS7_ctrl(p,PKCS7_OP_GET_DETACHED_SIGNATURE,0,NULL) + +# define PKCS7_is_detached(p7) (PKCS7_type_is_signed(p7) && PKCS7_get_detached(p7)) + +/* S/MIME related flags */ + +# define PKCS7_TEXT 0x1 +# define PKCS7_NOCERTS 0x2 +# define PKCS7_NOSIGS 0x4 +# define PKCS7_NOCHAIN 0x8 +# define PKCS7_NOINTERN 0x10 +# define PKCS7_NOVERIFY 0x20 +# define PKCS7_DETACHED 0x40 +# define PKCS7_BINARY 0x80 +# define PKCS7_NOATTR 0x100 +# define PKCS7_NOSMIMECAP 0x200 +# define PKCS7_NOOLDMIMETYPE 0x400 +# define PKCS7_CRLFEOL 0x800 +# define PKCS7_STREAM 0x1000 +# define PKCS7_NOCRL 0x2000 +# define PKCS7_PARTIAL 0x4000 +# define PKCS7_REUSE_DIGEST 0x8000 +# define PKCS7_NO_DUAL_CONTENT 0x10000 + +/* Flags: for compatibility with older code */ + +# define SMIME_TEXT PKCS7_TEXT +# define SMIME_NOCERTS PKCS7_NOCERTS +# define SMIME_NOSIGS PKCS7_NOSIGS +# define SMIME_NOCHAIN PKCS7_NOCHAIN +# define SMIME_NOINTERN PKCS7_NOINTERN +# define SMIME_NOVERIFY PKCS7_NOVERIFY +# define SMIME_DETACHED PKCS7_DETACHED +# define SMIME_BINARY PKCS7_BINARY +# define SMIME_NOATTR PKCS7_NOATTR + +/* CRLF ASCII canonicalisation */ +# define SMIME_ASCIICRLF 0x80000 + +DECLARE_ASN1_FUNCTIONS(PKCS7_ISSUER_AND_SERIAL) + +int PKCS7_ISSUER_AND_SERIAL_digest(PKCS7_ISSUER_AND_SERIAL *data, + const EVP_MD *type, unsigned char *md, + unsigned int *len); +# ifndef OPENSSL_NO_STDIO +PKCS7 *d2i_PKCS7_fp(FILE *fp, PKCS7 **p7); +int i2d_PKCS7_fp(FILE *fp, const PKCS7 *p7); +# endif +DECLARE_ASN1_DUP_FUNCTION(PKCS7) +PKCS7 *d2i_PKCS7_bio(BIO *bp, PKCS7 **p7); +int i2d_PKCS7_bio(BIO *bp, const PKCS7 *p7); +int i2d_PKCS7_bio_stream(BIO *out, PKCS7 *p7, BIO *in, int flags); +int PEM_write_bio_PKCS7_stream(BIO *out, PKCS7 *p7, BIO *in, int flags); + +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGNER_INFO) +DECLARE_ASN1_FUNCTIONS(PKCS7_RECIP_INFO) +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGNED) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENC_CONTENT) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENVELOPE) +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGN_ENVELOPE) +DECLARE_ASN1_FUNCTIONS(PKCS7_DIGEST) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENCRYPT) +DECLARE_ASN1_FUNCTIONS(PKCS7) +PKCS7 *PKCS7_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +DECLARE_ASN1_ITEM(PKCS7_ATTR_SIGN) +DECLARE_ASN1_ITEM(PKCS7_ATTR_VERIFY) + +DECLARE_ASN1_NDEF_FUNCTION(PKCS7) +DECLARE_ASN1_PRINT_FUNCTION(PKCS7) + +long PKCS7_ctrl(PKCS7 *p7, int cmd, long larg, char *parg); + +int PKCS7_type_is_other(PKCS7 *p7); +int PKCS7_set_type(PKCS7 *p7, int type); +int PKCS7_set0_type_other(PKCS7 *p7, int type, ASN1_TYPE *other); +int PKCS7_set_content(PKCS7 *p7, PKCS7 *p7_data); +int PKCS7_SIGNER_INFO_set(PKCS7_SIGNER_INFO *p7i, X509 *x509, EVP_PKEY *pkey, + const EVP_MD *dgst); +int PKCS7_SIGNER_INFO_sign(PKCS7_SIGNER_INFO *si); +int PKCS7_add_signer(PKCS7 *p7, PKCS7_SIGNER_INFO *p7i); +int PKCS7_add_certificate(PKCS7 *p7, X509 *cert); +int PKCS7_add_crl(PKCS7 *p7, X509_CRL *crl); +int PKCS7_content_new(PKCS7 *p7, int nid); +int PKCS7_dataVerify(X509_STORE *cert_store, X509_STORE_CTX *ctx, + BIO *bio, PKCS7 *p7, PKCS7_SIGNER_INFO *si); +int PKCS7_signatureVerify(BIO *bio, PKCS7 *p7, PKCS7_SIGNER_INFO *si, + X509 *signer); + +BIO *PKCS7_dataInit(PKCS7 *p7, BIO *bio); +int PKCS7_dataFinal(PKCS7 *p7, BIO *bio); +BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert); + +PKCS7_SIGNER_INFO *PKCS7_add_signature(PKCS7 *p7, X509 *x509, + EVP_PKEY *pkey, const EVP_MD *dgst); +X509 *PKCS7_cert_from_signer_info(PKCS7 *p7, PKCS7_SIGNER_INFO *si); +int PKCS7_set_digest(PKCS7 *p7, const EVP_MD *md); +STACK_OF(PKCS7_SIGNER_INFO) *PKCS7_get_signer_info(PKCS7 *p7); + +PKCS7_RECIP_INFO *PKCS7_add_recipient(PKCS7 *p7, X509 *x509); +void PKCS7_SIGNER_INFO_get0_algs(PKCS7_SIGNER_INFO *si, EVP_PKEY **pk, + X509_ALGOR **pdig, X509_ALGOR **psig); +void PKCS7_RECIP_INFO_get0_alg(PKCS7_RECIP_INFO *ri, X509_ALGOR **penc); +int PKCS7_add_recipient_info(PKCS7 *p7, PKCS7_RECIP_INFO *ri); +int PKCS7_RECIP_INFO_set(PKCS7_RECIP_INFO *p7i, X509 *x509); +int PKCS7_set_cipher(PKCS7 *p7, const EVP_CIPHER *cipher); +int PKCS7_stream(unsigned char ***boundary, PKCS7 *p7); + +PKCS7_ISSUER_AND_SERIAL *PKCS7_get_issuer_and_serial(PKCS7 *p7, int idx); +ASN1_OCTET_STRING *PKCS7_get_octet_string(PKCS7 *p7); +ASN1_OCTET_STRING *PKCS7_digest_from_attributes(STACK_OF(X509_ATTRIBUTE) *sk); +int PKCS7_add_signed_attribute(PKCS7_SIGNER_INFO *p7si, int nid, int type, + void *data); +int PKCS7_add_attribute(PKCS7_SIGNER_INFO *p7si, int nid, int atrtype, + void *value); +ASN1_TYPE *PKCS7_get_attribute(const PKCS7_SIGNER_INFO *si, int nid); +ASN1_TYPE *PKCS7_get_signed_attribute(const PKCS7_SIGNER_INFO *si, int nid); +int PKCS7_set_signed_attributes(PKCS7_SIGNER_INFO *p7si, + STACK_OF(X509_ATTRIBUTE) *sk); +int PKCS7_set_attributes(PKCS7_SIGNER_INFO *p7si, + STACK_OF(X509_ATTRIBUTE) *sk); + +PKCS7 *PKCS7_sign(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, int flags); +PKCS7 *PKCS7_sign_ex(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +PKCS7_SIGNER_INFO *PKCS7_sign_add_signer(PKCS7 *p7, + X509 *signcert, EVP_PKEY *pkey, + const EVP_MD *md, int flags); + +int PKCS7_final(PKCS7 *p7, BIO *data, int flags); +int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, + BIO *indata, BIO *out, int flags); +STACK_OF(X509) *PKCS7_get0_signers(PKCS7 *p7, STACK_OF(X509) *certs, + int flags); +PKCS7 *PKCS7_encrypt(STACK_OF(X509) *certs, BIO *in, const EVP_CIPHER *cipher, + int flags); +PKCS7 *PKCS7_encrypt_ex(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, int flags, + OSSL_LIB_CTX *libctx, const char *propq); +int PKCS7_decrypt(PKCS7 *p7, EVP_PKEY *pkey, X509 *cert, BIO *data, + int flags); + +int PKCS7_add_attrib_smimecap(PKCS7_SIGNER_INFO *si, + STACK_OF(X509_ALGOR) *cap); +STACK_OF(X509_ALGOR) *PKCS7_get_smimecap(PKCS7_SIGNER_INFO *si); +int PKCS7_simple_smimecap(STACK_OF(X509_ALGOR) *sk, int nid, int arg); + +int PKCS7_add_attrib_content_type(PKCS7_SIGNER_INFO *si, ASN1_OBJECT *coid); +int PKCS7_add0_attrib_signing_time(PKCS7_SIGNER_INFO *si, ASN1_TIME *t); +int PKCS7_add1_attrib_digest(PKCS7_SIGNER_INFO *si, + const unsigned char *md, int mdlen); + +int SMIME_write_PKCS7(BIO *bio, PKCS7 *p7, BIO *data, int flags); +PKCS7 *SMIME_read_PKCS7_ex(BIO *bio, BIO **bcont, PKCS7 **p7); +PKCS7 *SMIME_read_PKCS7(BIO *bio, BIO **bcont); + +BIO *BIO_new_PKCS7(BIO *out, PKCS7 *p7); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/safestack.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/safestack.h new file mode 100644 index 00000000000..0499700b562 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/safestack.h @@ -0,0 +1,297 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/safestack.h.in + * + * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_SAFESTACK_H +# define OPENSSL_SAFESTACK_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SAFESTACK_H +# endif + +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# define STACK_OF(type) struct stack_st_##type + +/* Helper macro for internal use */ +# define SKM_DEFINE_STACK_OF_INTERNAL(t1, t2, t3) \ + STACK_OF(t1); \ + typedef int (*sk_##t1##_compfunc)(const t3 * const *a, const t3 *const *b); \ + typedef void (*sk_##t1##_freefunc)(t3 *a); \ + typedef t3 * (*sk_##t1##_copyfunc)(const t3 *a); \ + static ossl_unused ossl_inline t2 *ossl_check_##t1##_type(t2 *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const OPENSSL_STACK *ossl_check_const_##t1##_sk_type(const STACK_OF(t1) *sk) \ + { \ + return (const OPENSSL_STACK *)sk; \ + } \ + static ossl_unused ossl_inline OPENSSL_STACK *ossl_check_##t1##_sk_type(STACK_OF(t1) *sk) \ + { \ + return (OPENSSL_STACK *)sk; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_compfunc ossl_check_##t1##_compfunc_type(sk_##t1##_compfunc cmp) \ + { \ + return (OPENSSL_sk_compfunc)cmp; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_copyfunc ossl_check_##t1##_copyfunc_type(sk_##t1##_copyfunc cpy) \ + { \ + return (OPENSSL_sk_copyfunc)cpy; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_freefunc ossl_check_##t1##_freefunc_type(sk_##t1##_freefunc fr) \ + { \ + return (OPENSSL_sk_freefunc)fr; \ + } + +# define SKM_DEFINE_STACK_OF(t1, t2, t3) \ + STACK_OF(t1); \ + typedef int (*sk_##t1##_compfunc)(const t3 * const *a, const t3 *const *b); \ + typedef void (*sk_##t1##_freefunc)(t3 *a); \ + typedef t3 * (*sk_##t1##_copyfunc)(const t3 *a); \ + static ossl_unused ossl_inline int sk_##t1##_num(const STACK_OF(t1) *sk) \ + { \ + return OPENSSL_sk_num((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_value(const STACK_OF(t1) *sk, int idx) \ + { \ + return (t2 *)OPENSSL_sk_value((const OPENSSL_STACK *)sk, idx); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new(sk_##t1##_compfunc compare) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new((OPENSSL_sk_compfunc)compare); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new_null(void) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new_null(); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new_reserve(sk_##t1##_compfunc compare, int n) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new_reserve((OPENSSL_sk_compfunc)compare, n); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_reserve(STACK_OF(t1) *sk, int n) \ + { \ + return OPENSSL_sk_reserve((OPENSSL_STACK *)sk, n); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_free(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_free((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_zero(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_zero((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_delete(STACK_OF(t1) *sk, int i) \ + { \ + return (t2 *)OPENSSL_sk_delete((OPENSSL_STACK *)sk, i); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_delete_ptr(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return (t2 *)OPENSSL_sk_delete_ptr((OPENSSL_STACK *)sk, \ + (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_push(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_push((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_unshift(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_unshift((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_pop(STACK_OF(t1) *sk) \ + { \ + return (t2 *)OPENSSL_sk_pop((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_shift(STACK_OF(t1) *sk) \ + { \ + return (t2 *)OPENSSL_sk_shift((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_pop_free(STACK_OF(t1) *sk, sk_##t1##_freefunc freefunc) \ + { \ + OPENSSL_sk_pop_free((OPENSSL_STACK *)sk, (OPENSSL_sk_freefunc)freefunc); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_insert(STACK_OF(t1) *sk, t2 *ptr, int idx) \ + { \ + return OPENSSL_sk_insert((OPENSSL_STACK *)sk, (const void *)ptr, idx); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_set(STACK_OF(t1) *sk, int idx, t2 *ptr) \ + { \ + return (t2 *)OPENSSL_sk_set((OPENSSL_STACK *)sk, idx, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_find((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find_ex(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_find_ex((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find_all(STACK_OF(t1) *sk, t2 *ptr, int *pnum) \ + { \ + return OPENSSL_sk_find_all((OPENSSL_STACK *)sk, (const void *)ptr, pnum); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_sort(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_sort((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_is_sorted(const STACK_OF(t1) *sk) \ + { \ + return OPENSSL_sk_is_sorted((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) * sk_##t1##_dup(const STACK_OF(t1) *sk) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_dup((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_deep_copy(const STACK_OF(t1) *sk, \ + sk_##t1##_copyfunc copyfunc, \ + sk_##t1##_freefunc freefunc) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_deep_copy((const OPENSSL_STACK *)sk, \ + (OPENSSL_sk_copyfunc)copyfunc, \ + (OPENSSL_sk_freefunc)freefunc); \ + } \ + static ossl_unused ossl_inline sk_##t1##_compfunc sk_##t1##_set_cmp_func(STACK_OF(t1) *sk, sk_##t1##_compfunc compare) \ + { \ + return (sk_##t1##_compfunc)OPENSSL_sk_set_cmp_func((OPENSSL_STACK *)sk, (OPENSSL_sk_compfunc)compare); \ + } + +# define DEFINE_STACK_OF(t) SKM_DEFINE_STACK_OF(t, t, t) +# define DEFINE_STACK_OF_CONST(t) SKM_DEFINE_STACK_OF(t, const t, t) +# define DEFINE_SPECIAL_STACK_OF(t1, t2) SKM_DEFINE_STACK_OF(t1, t2, t2) +# define DEFINE_SPECIAL_STACK_OF_CONST(t1, t2) \ + SKM_DEFINE_STACK_OF(t1, const t2, t2) + +/*- + * Strings are special: normally an lhash entry will point to a single + * (somewhat) mutable object. In the case of strings: + * + * a) Instead of a single char, there is an array of chars, NUL-terminated. + * b) The string may have be immutable. + * + * So, they need their own declarations. Especially important for + * type-checking tools, such as Deputy. + * + * In practice, however, it appears to be hard to have a const + * string. For now, I'm settling for dealing with the fact it is a + * string at all. + */ +typedef char *OPENSSL_STRING; +typedef const char *OPENSSL_CSTRING; + +/*- + * Confusingly, LHASH_OF(STRING) deals with char ** throughout, but + * STACK_OF(STRING) is really more like STACK_OF(char), only, as mentioned + * above, instead of a single char each entry is a NUL-terminated array of + * chars. So, we have to implement STRING specially for STACK_OF. This is + * dealt with in the autogenerated macros below. + */ +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_STRING, char, char) +#define sk_OPENSSL_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_value(sk, idx) ((char *)OPENSSL_sk_value(ossl_check_const_OPENSSL_STRING_sk_type(sk), (idx))) +#define sk_OPENSSL_STRING_new(cmp) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new(ossl_check_OPENSSL_STRING_compfunc_type(cmp))) +#define sk_OPENSSL_STRING_new_null() ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_STRING_new_reserve(cmp, n) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_STRING_compfunc_type(cmp), (n))) +#define sk_OPENSSL_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_STRING_sk_type(sk), (n)) +#define sk_OPENSSL_STRING_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_delete(sk, i) ((char *)OPENSSL_sk_delete(ossl_check_OPENSSL_STRING_sk_type(sk), (i))) +#define sk_OPENSSL_STRING_delete_ptr(sk, ptr) ((char *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr))) +#define sk_OPENSSL_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_pop(sk) ((char *)OPENSSL_sk_pop(ossl_check_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_shift(sk) ((char *)OPENSSL_sk_shift(ossl_check_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_STRING_sk_type(sk),ossl_check_OPENSSL_STRING_freefunc_type(freefunc)) +#define sk_OPENSSL_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr), (idx)) +#define sk_OPENSSL_STRING_set(sk, idx, ptr) ((char *)OPENSSL_sk_set(ossl_check_OPENSSL_STRING_sk_type(sk), (idx), ossl_check_OPENSSL_STRING_type(ptr))) +#define sk_OPENSSL_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr), pnum) +#define sk_OPENSSL_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_dup(sk) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_copyfunc_type(copyfunc), ossl_check_OPENSSL_STRING_freefunc_type(freefunc))) +#define sk_OPENSSL_STRING_set_cmp_func(sk, cmp) ((sk_OPENSSL_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_CSTRING, const char, char) +#define sk_OPENSSL_CSTRING_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_value(sk, idx) ((const char *)OPENSSL_sk_value(ossl_check_const_OPENSSL_CSTRING_sk_type(sk), (idx))) +#define sk_OPENSSL_CSTRING_new(cmp) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new(ossl_check_OPENSSL_CSTRING_compfunc_type(cmp))) +#define sk_OPENSSL_CSTRING_new_null() ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_CSTRING_new_reserve(cmp, n) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_CSTRING_compfunc_type(cmp), (n))) +#define sk_OPENSSL_CSTRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_CSTRING_sk_type(sk), (n)) +#define sk_OPENSSL_CSTRING_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_delete(sk, i) ((const char *)OPENSSL_sk_delete(ossl_check_OPENSSL_CSTRING_sk_type(sk), (i))) +#define sk_OPENSSL_CSTRING_delete_ptr(sk, ptr) ((const char *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr))) +#define sk_OPENSSL_CSTRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_pop(sk) ((const char *)OPENSSL_sk_pop(ossl_check_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_shift(sk) ((const char *)OPENSSL_sk_shift(ossl_check_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_CSTRING_sk_type(sk),ossl_check_OPENSSL_CSTRING_freefunc_type(freefunc)) +#define sk_OPENSSL_CSTRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr), (idx)) +#define sk_OPENSSL_CSTRING_set(sk, idx, ptr) ((const char *)OPENSSL_sk_set(ossl_check_OPENSSL_CSTRING_sk_type(sk), (idx), ossl_check_OPENSSL_CSTRING_type(ptr))) +#define sk_OPENSSL_CSTRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr), pnum) +#define sk_OPENSSL_CSTRING_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_dup(sk) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_copyfunc_type(copyfunc), ossl_check_OPENSSL_CSTRING_freefunc_type(freefunc))) +#define sk_OPENSSL_CSTRING_set_cmp_func(sk, cmp) ((sk_OPENSSL_CSTRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_compfunc_type(cmp))) + + +#if !defined(OPENSSL_NO_DEPRECATED_3_0) +/* + * This is not used by OpenSSL. A block of bytes, NOT nul-terminated. + * These should also be distinguished from "normal" stacks. + */ +typedef void *OPENSSL_BLOCK; +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_BLOCK, void, void) +#define sk_OPENSSL_BLOCK_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_value(sk, idx) ((void *)OPENSSL_sk_value(ossl_check_const_OPENSSL_BLOCK_sk_type(sk), (idx))) +#define sk_OPENSSL_BLOCK_new(cmp) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new(ossl_check_OPENSSL_BLOCK_compfunc_type(cmp))) +#define sk_OPENSSL_BLOCK_new_null() ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_BLOCK_new_reserve(cmp, n) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_BLOCK_compfunc_type(cmp), (n))) +#define sk_OPENSSL_BLOCK_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_BLOCK_sk_type(sk), (n)) +#define sk_OPENSSL_BLOCK_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_delete(sk, i) ((void *)OPENSSL_sk_delete(ossl_check_OPENSSL_BLOCK_sk_type(sk), (i))) +#define sk_OPENSSL_BLOCK_delete_ptr(sk, ptr) ((void *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr))) +#define sk_OPENSSL_BLOCK_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_pop(sk) ((void *)OPENSSL_sk_pop(ossl_check_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_shift(sk) ((void *)OPENSSL_sk_shift(ossl_check_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_BLOCK_sk_type(sk),ossl_check_OPENSSL_BLOCK_freefunc_type(freefunc)) +#define sk_OPENSSL_BLOCK_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr), (idx)) +#define sk_OPENSSL_BLOCK_set(sk, idx, ptr) ((void *)OPENSSL_sk_set(ossl_check_OPENSSL_BLOCK_sk_type(sk), (idx), ossl_check_OPENSSL_BLOCK_type(ptr))) +#define sk_OPENSSL_BLOCK_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr), pnum) +#define sk_OPENSSL_BLOCK_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_dup(sk) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_copyfunc_type(copyfunc), ossl_check_OPENSSL_BLOCK_freefunc_type(freefunc))) +#define sk_OPENSSL_BLOCK_set_cmp_func(sk, cmp) ((sk_OPENSSL_BLOCK_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_compfunc_type(cmp))) + +#endif + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/srp.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/srp.h new file mode 100644 index 00000000000..a48766c6ce8 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/srp.h @@ -0,0 +1,285 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/srp.h.in + * + * Copyright 2004-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2004, EdelKey Project. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * Originally written by Christophe Renou and Peter Sylvester, + * for the EdelKey project. + */ + + + +#ifndef OPENSSL_SRP_H +# define OPENSSL_SRP_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SRP_H +# endif + +#include + +#ifndef OPENSSL_NO_SRP +# include +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 + +typedef struct SRP_gN_cache_st { + char *b64_bn; + BIGNUM *bn; +} SRP_gN_cache; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_gN_cache, SRP_gN_cache, SRP_gN_cache) +#define sk_SRP_gN_cache_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_value(sk, idx) ((SRP_gN_cache *)OPENSSL_sk_value(ossl_check_const_SRP_gN_cache_sk_type(sk), (idx))) +#define sk_SRP_gN_cache_new(cmp) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new(ossl_check_SRP_gN_cache_compfunc_type(cmp))) +#define sk_SRP_gN_cache_new_null() ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new_null()) +#define sk_SRP_gN_cache_new_reserve(cmp, n) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new_reserve(ossl_check_SRP_gN_cache_compfunc_type(cmp), (n))) +#define sk_SRP_gN_cache_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_gN_cache_sk_type(sk), (n)) +#define sk_SRP_gN_cache_free(sk) OPENSSL_sk_free(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_delete(sk, i) ((SRP_gN_cache *)OPENSSL_sk_delete(ossl_check_SRP_gN_cache_sk_type(sk), (i))) +#define sk_SRP_gN_cache_delete_ptr(sk, ptr) ((SRP_gN_cache *)OPENSSL_sk_delete_ptr(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr))) +#define sk_SRP_gN_cache_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_pop(sk) ((SRP_gN_cache *)OPENSSL_sk_pop(ossl_check_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_shift(sk) ((SRP_gN_cache *)OPENSSL_sk_shift(ossl_check_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_gN_cache_sk_type(sk),ossl_check_SRP_gN_cache_freefunc_type(freefunc)) +#define sk_SRP_gN_cache_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr), (idx)) +#define sk_SRP_gN_cache_set(sk, idx, ptr) ((SRP_gN_cache *)OPENSSL_sk_set(ossl_check_SRP_gN_cache_sk_type(sk), (idx), ossl_check_SRP_gN_cache_type(ptr))) +#define sk_SRP_gN_cache_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr), pnum) +#define sk_SRP_gN_cache_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_dup(sk) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_dup(ossl_check_const_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_copyfunc_type(copyfunc), ossl_check_SRP_gN_cache_freefunc_type(freefunc))) +#define sk_SRP_gN_cache_set_cmp_func(sk, cmp) ((sk_SRP_gN_cache_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_compfunc_type(cmp))) + + + +typedef struct SRP_user_pwd_st { + /* Owned by us. */ + char *id; + BIGNUM *s; + BIGNUM *v; + /* Not owned by us. */ + const BIGNUM *g; + const BIGNUM *N; + /* Owned by us. */ + char *info; +} SRP_user_pwd; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_user_pwd, SRP_user_pwd, SRP_user_pwd) +#define sk_SRP_user_pwd_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_value(sk, idx) ((SRP_user_pwd *)OPENSSL_sk_value(ossl_check_const_SRP_user_pwd_sk_type(sk), (idx))) +#define sk_SRP_user_pwd_new(cmp) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new(ossl_check_SRP_user_pwd_compfunc_type(cmp))) +#define sk_SRP_user_pwd_new_null() ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new_null()) +#define sk_SRP_user_pwd_new_reserve(cmp, n) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new_reserve(ossl_check_SRP_user_pwd_compfunc_type(cmp), (n))) +#define sk_SRP_user_pwd_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_user_pwd_sk_type(sk), (n)) +#define sk_SRP_user_pwd_free(sk) OPENSSL_sk_free(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_delete(sk, i) ((SRP_user_pwd *)OPENSSL_sk_delete(ossl_check_SRP_user_pwd_sk_type(sk), (i))) +#define sk_SRP_user_pwd_delete_ptr(sk, ptr) ((SRP_user_pwd *)OPENSSL_sk_delete_ptr(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr))) +#define sk_SRP_user_pwd_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_pop(sk) ((SRP_user_pwd *)OPENSSL_sk_pop(ossl_check_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_shift(sk) ((SRP_user_pwd *)OPENSSL_sk_shift(ossl_check_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_user_pwd_sk_type(sk),ossl_check_SRP_user_pwd_freefunc_type(freefunc)) +#define sk_SRP_user_pwd_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr), (idx)) +#define sk_SRP_user_pwd_set(sk, idx, ptr) ((SRP_user_pwd *)OPENSSL_sk_set(ossl_check_SRP_user_pwd_sk_type(sk), (idx), ossl_check_SRP_user_pwd_type(ptr))) +#define sk_SRP_user_pwd_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr), pnum) +#define sk_SRP_user_pwd_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_dup(sk) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_dup(ossl_check_const_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_copyfunc_type(copyfunc), ossl_check_SRP_user_pwd_freefunc_type(freefunc))) +#define sk_SRP_user_pwd_set_cmp_func(sk, cmp) ((sk_SRP_user_pwd_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_compfunc_type(cmp))) + + +OSSL_DEPRECATEDIN_3_0 +SRP_user_pwd *SRP_user_pwd_new(void); +OSSL_DEPRECATEDIN_3_0 +void SRP_user_pwd_free(SRP_user_pwd *user_pwd); + +OSSL_DEPRECATEDIN_3_0 +void SRP_user_pwd_set_gN(SRP_user_pwd *user_pwd, const BIGNUM *g, + const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +int SRP_user_pwd_set1_ids(SRP_user_pwd *user_pwd, const char *id, + const char *info); +OSSL_DEPRECATEDIN_3_0 +int SRP_user_pwd_set0_sv(SRP_user_pwd *user_pwd, BIGNUM *s, BIGNUM *v); + +typedef struct SRP_VBASE_st { + STACK_OF(SRP_user_pwd) *users_pwd; + STACK_OF(SRP_gN_cache) *gN_cache; +/* to simulate a user */ + char *seed_key; + const BIGNUM *default_g; + const BIGNUM *default_N; +} SRP_VBASE; + +/* + * Internal structure storing N and g pair + */ +typedef struct SRP_gN_st { + char *id; + const BIGNUM *g; + const BIGNUM *N; +} SRP_gN; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_gN, SRP_gN, SRP_gN) +#define sk_SRP_gN_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_value(sk, idx) ((SRP_gN *)OPENSSL_sk_value(ossl_check_const_SRP_gN_sk_type(sk), (idx))) +#define sk_SRP_gN_new(cmp) ((STACK_OF(SRP_gN) *)OPENSSL_sk_new(ossl_check_SRP_gN_compfunc_type(cmp))) +#define sk_SRP_gN_new_null() ((STACK_OF(SRP_gN) *)OPENSSL_sk_new_null()) +#define sk_SRP_gN_new_reserve(cmp, n) ((STACK_OF(SRP_gN) *)OPENSSL_sk_new_reserve(ossl_check_SRP_gN_compfunc_type(cmp), (n))) +#define sk_SRP_gN_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_gN_sk_type(sk), (n)) +#define sk_SRP_gN_free(sk) OPENSSL_sk_free(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_delete(sk, i) ((SRP_gN *)OPENSSL_sk_delete(ossl_check_SRP_gN_sk_type(sk), (i))) +#define sk_SRP_gN_delete_ptr(sk, ptr) ((SRP_gN *)OPENSSL_sk_delete_ptr(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr))) +#define sk_SRP_gN_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_pop(sk) ((SRP_gN *)OPENSSL_sk_pop(ossl_check_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_shift(sk) ((SRP_gN *)OPENSSL_sk_shift(ossl_check_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_gN_sk_type(sk),ossl_check_SRP_gN_freefunc_type(freefunc)) +#define sk_SRP_gN_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr), (idx)) +#define sk_SRP_gN_set(sk, idx, ptr) ((SRP_gN *)OPENSSL_sk_set(ossl_check_SRP_gN_sk_type(sk), (idx), ossl_check_SRP_gN_type(ptr))) +#define sk_SRP_gN_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr), pnum) +#define sk_SRP_gN_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_dup(sk) ((STACK_OF(SRP_gN) *)OPENSSL_sk_dup(ossl_check_const_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_gN) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_gN_sk_type(sk), ossl_check_SRP_gN_copyfunc_type(copyfunc), ossl_check_SRP_gN_freefunc_type(freefunc))) +#define sk_SRP_gN_set_cmp_func(sk, cmp) ((sk_SRP_gN_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_compfunc_type(cmp))) + + + +OSSL_DEPRECATEDIN_3_0 +SRP_VBASE *SRP_VBASE_new(char *seed_key); +OSSL_DEPRECATEDIN_3_0 +void SRP_VBASE_free(SRP_VBASE *vb); +OSSL_DEPRECATEDIN_3_0 +int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file); + +OSSL_DEPRECATEDIN_3_0 +int SRP_VBASE_add0_user(SRP_VBASE *vb, SRP_user_pwd *user_pwd); + +/* NOTE: unlike in SRP_VBASE_get_by_user, caller owns the returned pointer.*/ +OSSL_DEPRECATEDIN_3_0 +SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username); + +OSSL_DEPRECATEDIN_3_0 +char *SRP_create_verifier_ex(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +char *SRP_create_verifier(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g); +OSSL_DEPRECATEDIN_3_0 +int SRP_create_verifier_BN_ex(const char *user, const char *pass, BIGNUM **salt, + BIGNUM **verifier, const BIGNUM *N, + const BIGNUM *g, OSSL_LIB_CTX *libctx, + const char *propq); +OSSL_DEPRECATEDIN_3_0 +int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, + BIGNUM **verifier, const BIGNUM *N, + const BIGNUM *g); + +# define SRP_NO_ERROR 0 +# define SRP_ERR_VBASE_INCOMPLETE_FILE 1 +# define SRP_ERR_VBASE_BN_LIB 2 +# define SRP_ERR_OPEN_FILE 3 +# define SRP_ERR_MEMORY 4 + +# define DB_srptype 0 +# define DB_srpverifier 1 +# define DB_srpsalt 2 +# define DB_srpid 3 +# define DB_srpgN 4 +# define DB_srpinfo 5 +# undef DB_NUMBER +# define DB_NUMBER 6 + +# define DB_SRP_INDEX 'I' +# define DB_SRP_VALID 'V' +# define DB_SRP_REVOKED 'R' +# define DB_SRP_MODIF 'v' + +/* see srp.c */ +OSSL_DEPRECATEDIN_3_0 +char *SRP_check_known_gN_param(const BIGNUM *g, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +SRP_gN *SRP_get_default_gN(const char *id); + +/* server side .... */ +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_server_key(const BIGNUM *A, const BIGNUM *v, const BIGNUM *u, + const BIGNUM *b, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_B_ex(const BIGNUM *b, const BIGNUM *N, const BIGNUM *g, + const BIGNUM *v, OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_B(const BIGNUM *b, const BIGNUM *N, const BIGNUM *g, + const BIGNUM *v); + +OSSL_DEPRECATEDIN_3_0 +int SRP_Verify_A_mod_N(const BIGNUM *A, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_u_ex(const BIGNUM *A, const BIGNUM *B, const BIGNUM *N, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_u(const BIGNUM *A, const BIGNUM *B, const BIGNUM *N); + +/* client side .... */ + +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_x_ex(const BIGNUM *s, const char *user, const char *pass, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_x(const BIGNUM *s, const char *user, const char *pass); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_A(const BIGNUM *a, const BIGNUM *N, const BIGNUM *g); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_client_key_ex(const BIGNUM *N, const BIGNUM *B, const BIGNUM *g, + const BIGNUM *x, const BIGNUM *a, const BIGNUM *u, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_client_key(const BIGNUM *N, const BIGNUM *B, const BIGNUM *g, + const BIGNUM *x, const BIGNUM *a, const BIGNUM *u); +OSSL_DEPRECATEDIN_3_0 +int SRP_Verify_B_mod_N(const BIGNUM *B, const BIGNUM *N); + +# define SRP_MINIMAL_N 1024 + +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ + +/* This method ignores the configured seed and fails for an unknown user. */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 +SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username); +# endif + +# ifdef __cplusplus +} +# endif +# endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ssl.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ssl.h new file mode 100644 index 00000000000..2b43485f725 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ssl.h @@ -0,0 +1,2765 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ssl.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * Copyright 2005 Nokia. All rights reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_SSL_H +# define OPENSSL_SSL_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SSL_H +# endif + +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# include +# include +# endif +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* OpenSSL version number for ASN.1 encoding of the session information */ +/*- + * Version 0 - initial version + * Version 1 - added the optional peer certificate + */ +# define SSL_SESSION_ASN1_VERSION 0x0001 + +# define SSL_MAX_SSL_SESSION_ID_LENGTH 32 +# define SSL_MAX_SID_CTX_LENGTH 32 + +# define SSL_MIN_RSA_MODULUS_LENGTH_IN_BYTES (512/8) +# define SSL_MAX_KEY_ARG_LENGTH 8 +/* SSL_MAX_MASTER_KEY_LENGTH is defined in prov_ssl.h */ + +/* The maximum number of encrypt/decrypt pipelines we can support */ +# define SSL_MAX_PIPELINES 32 + +/* text strings for the ciphers */ + +/* These are used to specify which ciphers to use and not to use */ + +# define SSL_TXT_LOW "LOW" +# define SSL_TXT_MEDIUM "MEDIUM" +# define SSL_TXT_HIGH "HIGH" +# define SSL_TXT_FIPS "FIPS" + +# define SSL_TXT_aNULL "aNULL" +# define SSL_TXT_eNULL "eNULL" +# define SSL_TXT_NULL "NULL" + +# define SSL_TXT_kRSA "kRSA" +# define SSL_TXT_kDHr "kDHr"/* this cipher class has been removed */ +# define SSL_TXT_kDHd "kDHd"/* this cipher class has been removed */ +# define SSL_TXT_kDH "kDH"/* this cipher class has been removed */ +# define SSL_TXT_kEDH "kEDH"/* alias for kDHE */ +# define SSL_TXT_kDHE "kDHE" +# define SSL_TXT_kECDHr "kECDHr"/* this cipher class has been removed */ +# define SSL_TXT_kECDHe "kECDHe"/* this cipher class has been removed */ +# define SSL_TXT_kECDH "kECDH"/* this cipher class has been removed */ +# define SSL_TXT_kEECDH "kEECDH"/* alias for kECDHE */ +# define SSL_TXT_kECDHE "kECDHE" +# define SSL_TXT_kPSK "kPSK" +# define SSL_TXT_kRSAPSK "kRSAPSK" +# define SSL_TXT_kECDHEPSK "kECDHEPSK" +# define SSL_TXT_kDHEPSK "kDHEPSK" +# define SSL_TXT_kGOST "kGOST" +# define SSL_TXT_kGOST18 "kGOST18" +# define SSL_TXT_kSRP "kSRP" + +# define SSL_TXT_aRSA "aRSA" +# define SSL_TXT_aDSS "aDSS" +# define SSL_TXT_aDH "aDH"/* this cipher class has been removed */ +# define SSL_TXT_aECDH "aECDH"/* this cipher class has been removed */ +# define SSL_TXT_aECDSA "aECDSA" +# define SSL_TXT_aPSK "aPSK" +# define SSL_TXT_aGOST94 "aGOST94" +# define SSL_TXT_aGOST01 "aGOST01" +# define SSL_TXT_aGOST12 "aGOST12" +# define SSL_TXT_aGOST "aGOST" +# define SSL_TXT_aSRP "aSRP" + +# define SSL_TXT_DSS "DSS" +# define SSL_TXT_DH "DH" +# define SSL_TXT_DHE "DHE"/* same as "kDHE:-ADH" */ +# define SSL_TXT_EDH "EDH"/* alias for DHE */ +# define SSL_TXT_ADH "ADH" +# define SSL_TXT_RSA "RSA" +# define SSL_TXT_ECDH "ECDH" +# define SSL_TXT_EECDH "EECDH"/* alias for ECDHE" */ +# define SSL_TXT_ECDHE "ECDHE"/* same as "kECDHE:-AECDH" */ +# define SSL_TXT_AECDH "AECDH" +# define SSL_TXT_ECDSA "ECDSA" +# define SSL_TXT_PSK "PSK" +# define SSL_TXT_SRP "SRP" + +# define SSL_TXT_DES "DES" +# define SSL_TXT_3DES "3DES" +# define SSL_TXT_RC4 "RC4" +# define SSL_TXT_RC2 "RC2" +# define SSL_TXT_IDEA "IDEA" +# define SSL_TXT_SEED "SEED" +# define SSL_TXT_AES128 "AES128" +# define SSL_TXT_AES256 "AES256" +# define SSL_TXT_AES "AES" +# define SSL_TXT_AES_GCM "AESGCM" +# define SSL_TXT_AES_CCM "AESCCM" +# define SSL_TXT_AES_CCM_8 "AESCCM8" +# define SSL_TXT_CAMELLIA128 "CAMELLIA128" +# define SSL_TXT_CAMELLIA256 "CAMELLIA256" +# define SSL_TXT_CAMELLIA "CAMELLIA" +# define SSL_TXT_CHACHA20 "CHACHA20" +# define SSL_TXT_GOST "GOST89" +# define SSL_TXT_ARIA "ARIA" +# define SSL_TXT_ARIA_GCM "ARIAGCM" +# define SSL_TXT_ARIA128 "ARIA128" +# define SSL_TXT_ARIA256 "ARIA256" +# define SSL_TXT_GOST2012_GOST8912_GOST8912 "GOST2012-GOST8912-GOST8912" +# define SSL_TXT_CBC "CBC" + +# define SSL_TXT_MD5 "MD5" +# define SSL_TXT_SHA1 "SHA1" +# define SSL_TXT_SHA "SHA"/* same as "SHA1" */ +# define SSL_TXT_GOST94 "GOST94" +# define SSL_TXT_GOST89MAC "GOST89MAC" +# define SSL_TXT_GOST12 "GOST12" +# define SSL_TXT_GOST89MAC12 "GOST89MAC12" +# define SSL_TXT_SHA256 "SHA256" +# define SSL_TXT_SHA384 "SHA384" + +# define SSL_TXT_SSLV3 "SSLv3" +# define SSL_TXT_TLSV1 "TLSv1" +# define SSL_TXT_TLSV1_1 "TLSv1.1" +# define SSL_TXT_TLSV1_2 "TLSv1.2" + +# define SSL_TXT_ALL "ALL" + +/*- + * COMPLEMENTOF* definitions. These identifiers are used to (de-select) + * ciphers normally not being used. + * Example: "RC4" will activate all ciphers using RC4 including ciphers + * without authentication, which would normally disabled by DEFAULT (due + * the "!ADH" being part of default). Therefore "RC4:!COMPLEMENTOFDEFAULT" + * will make sure that it is also disabled in the specific selection. + * COMPLEMENTOF* identifiers are portable between version, as adjustments + * to the default cipher setup will also be included here. + * + * COMPLEMENTOFDEFAULT does not experience the same special treatment that + * DEFAULT gets, as only selection is being done and no sorting as needed + * for DEFAULT. + */ +# define SSL_TXT_CMPALL "COMPLEMENTOFALL" +# define SSL_TXT_CMPDEF "COMPLEMENTOFDEFAULT" + +/* + * The following cipher list is used by default. It also is substituted when + * an application-defined cipher list string starts with 'DEFAULT'. + * This applies to ciphersuites for TLSv1.2 and below. + * DEPRECATED IN 3.0.0, in favor of OSSL_default_cipher_list() + * Update both macro and function simultaneously + */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_DEFAULT_CIPHER_LIST "ALL:!COMPLEMENTOFDEFAULT:!eNULL" +/* + * This is the default set of TLSv1.3 ciphersuites + * DEPRECATED IN 3.0.0, in favor of OSSL_default_ciphersuites() + * Update both macro and function simultaneously + */ +# define TLS_DEFAULT_CIPHERSUITES "TLS_AES_256_GCM_SHA384:" \ + "TLS_CHACHA20_POLY1305_SHA256:" \ + "TLS_AES_128_GCM_SHA256" +# endif +/* + * As of OpenSSL 1.0.0, ssl_create_cipher_list() in ssl/ssl_ciph.c always + * starts with a reasonable order, and all we have to do for DEFAULT is + * throwing out anonymous and unencrypted ciphersuites! (The latter are not + * actually enabled by ALL, but "ALL:RSA" would enable some of them.) + */ + +/* Used in SSL_set_shutdown()/SSL_get_shutdown(); */ +# define SSL_SENT_SHUTDOWN 1 +# define SSL_RECEIVED_SHUTDOWN 2 + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define SSL_FILETYPE_ASN1 X509_FILETYPE_ASN1 +# define SSL_FILETYPE_PEM X509_FILETYPE_PEM + +/* + * This is needed to stop compilers complaining about the 'struct ssl_st *' + * function parameters used to prototype callbacks in SSL_CTX. + */ +typedef struct ssl_st *ssl_crock_st; +typedef struct tls_session_ticket_ext_st TLS_SESSION_TICKET_EXT; +typedef struct ssl_method_st SSL_METHOD; +typedef struct ssl_cipher_st SSL_CIPHER; +typedef struct ssl_session_st SSL_SESSION; +typedef struct tls_sigalgs_st TLS_SIGALGS; +typedef struct ssl_conf_ctx_st SSL_CONF_CTX; +typedef struct ssl_comp_st SSL_COMP; + +STACK_OF(SSL_CIPHER); +STACK_OF(SSL_COMP); + +/* SRTP protection profiles for use with the use_srtp extension (RFC 5764)*/ +typedef struct srtp_protection_profile_st { + const char *name; + unsigned long id; +} SRTP_PROTECTION_PROFILE; +SKM_DEFINE_STACK_OF_INTERNAL(SRTP_PROTECTION_PROFILE, SRTP_PROTECTION_PROFILE, SRTP_PROTECTION_PROFILE) +#define sk_SRTP_PROTECTION_PROFILE_num(sk) OPENSSL_sk_num(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_value(sk, idx) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_value(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk), (idx))) +#define sk_SRTP_PROTECTION_PROFILE_new(cmp) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new(ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp))) +#define sk_SRTP_PROTECTION_PROFILE_new_null() ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new_null()) +#define sk_SRTP_PROTECTION_PROFILE_new_reserve(cmp, n) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new_reserve(ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp), (n))) +#define sk_SRTP_PROTECTION_PROFILE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (n)) +#define sk_SRTP_PROTECTION_PROFILE_free(sk) OPENSSL_sk_free(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_zero(sk) OPENSSL_sk_zero(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_delete(sk, i) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_delete(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (i))) +#define sk_SRTP_PROTECTION_PROFILE_delete_ptr(sk, ptr) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_delete_ptr(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr))) +#define sk_SRTP_PROTECTION_PROFILE_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_pop(sk) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_pop(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_shift(sk) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_shift(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk),ossl_check_SRTP_PROTECTION_PROFILE_freefunc_type(freefunc)) +#define sk_SRTP_PROTECTION_PROFILE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr), (idx)) +#define sk_SRTP_PROTECTION_PROFILE_set(sk, idx, ptr) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_set(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (idx), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr))) +#define sk_SRTP_PROTECTION_PROFILE_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr), pnum) +#define sk_SRTP_PROTECTION_PROFILE_sort(sk) OPENSSL_sk_sort(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_dup(sk) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_dup(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_deep_copy(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_copyfunc_type(copyfunc), ossl_check_SRTP_PROTECTION_PROFILE_freefunc_type(freefunc))) +#define sk_SRTP_PROTECTION_PROFILE_set_cmp_func(sk, cmp) ((sk_SRTP_PROTECTION_PROFILE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp))) + + + +typedef int (*tls_session_ticket_ext_cb_fn)(SSL *s, const unsigned char *data, + int len, void *arg); +typedef int (*tls_session_secret_cb_fn)(SSL *s, void *secret, int *secret_len, + STACK_OF(SSL_CIPHER) *peer_ciphers, + const SSL_CIPHER **cipher, void *arg); + +/* Extension context codes */ +/* This extension is only allowed in TLS */ +#define SSL_EXT_TLS_ONLY 0x00001 +/* This extension is only allowed in DTLS */ +#define SSL_EXT_DTLS_ONLY 0x00002 +/* Some extensions may be allowed in DTLS but we don't implement them for it */ +#define SSL_EXT_TLS_IMPLEMENTATION_ONLY 0x00004 +/* Most extensions are not defined for SSLv3 but EXT_TYPE_renegotiate is */ +#define SSL_EXT_SSL3_ALLOWED 0x00008 +/* Extension is only defined for TLS1.2 and below */ +#define SSL_EXT_TLS1_2_AND_BELOW_ONLY 0x00010 +/* Extension is only defined for TLS1.3 and above */ +#define SSL_EXT_TLS1_3_ONLY 0x00020 +/* Ignore this extension during parsing if we are resuming */ +#define SSL_EXT_IGNORE_ON_RESUMPTION 0x00040 +#define SSL_EXT_CLIENT_HELLO 0x00080 +/* Really means TLS1.2 or below */ +#define SSL_EXT_TLS1_2_SERVER_HELLO 0x00100 +#define SSL_EXT_TLS1_3_SERVER_HELLO 0x00200 +#define SSL_EXT_TLS1_3_ENCRYPTED_EXTENSIONS 0x00400 +#define SSL_EXT_TLS1_3_HELLO_RETRY_REQUEST 0x00800 +#define SSL_EXT_TLS1_3_CERTIFICATE 0x01000 +#define SSL_EXT_TLS1_3_NEW_SESSION_TICKET 0x02000 +#define SSL_EXT_TLS1_3_CERTIFICATE_REQUEST 0x04000 +#define SSL_EXT_TLS1_3_CERTIFICATE_COMPRESSION 0x08000 +/* When sending a raw public key in a certificate message */ +#define SSL_EXT_TLS1_3_RAW_PUBLIC_KEY 0x10000 + +/* Typedefs for handling custom extensions */ + +typedef int (*custom_ext_add_cb)(SSL *s, unsigned int ext_type, + const unsigned char **out, size_t *outlen, + int *al, void *add_arg); + +typedef void (*custom_ext_free_cb)(SSL *s, unsigned int ext_type, + const unsigned char *out, void *add_arg); + +typedef int (*custom_ext_parse_cb)(SSL *s, unsigned int ext_type, + const unsigned char *in, size_t inlen, + int *al, void *parse_arg); + + +typedef int (*SSL_custom_ext_add_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char **out, + size_t *outlen, X509 *x, + size_t chainidx, + int *al, void *add_arg); + +typedef void (*SSL_custom_ext_free_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char *out, + void *add_arg); + +typedef int (*SSL_custom_ext_parse_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char *in, + size_t inlen, X509 *x, + size_t chainidx, + int *al, void *parse_arg); + +/* Typedef for verification callback */ +typedef int (*SSL_verify_cb)(int preverify_ok, X509_STORE_CTX *x509_ctx); + +/* Typedef for SSL async callback */ +typedef int (*SSL_async_callback_fn)(SSL *s, void *arg); + +#define SSL_OP_BIT(n) ((uint64_t)1 << (uint64_t)n) + +/* + * SSL/TLS connection options. + */ + /* Disable Extended master secret */ +# define SSL_OP_NO_EXTENDED_MASTER_SECRET SSL_OP_BIT(0) + /* Cleanse plaintext copies of data delivered to the application */ +# define SSL_OP_CLEANSE_PLAINTEXT SSL_OP_BIT(1) + /* Allow initial connection to servers that don't support RI */ +# define SSL_OP_LEGACY_SERVER_CONNECT SSL_OP_BIT(2) + /* Enable support for Kernel TLS */ +# define SSL_OP_ENABLE_KTLS SSL_OP_BIT(3) +# define SSL_OP_TLSEXT_PADDING SSL_OP_BIT(4) +# define SSL_OP_SAFARI_ECDHE_ECDSA_BUG SSL_OP_BIT(6) +# define SSL_OP_IGNORE_UNEXPECTED_EOF SSL_OP_BIT(7) +# define SSL_OP_ALLOW_CLIENT_RENEGOTIATION SSL_OP_BIT(8) +# define SSL_OP_DISABLE_TLSEXT_CA_NAMES SSL_OP_BIT(9) + /* In TLSv1.3 allow a non-(ec)dhe based kex_mode */ +# define SSL_OP_ALLOW_NO_DHE_KEX SSL_OP_BIT(10) + /* + * Disable SSL 3.0/TLS 1.0 CBC vulnerability workaround that was added + * in OpenSSL 0.9.6d. Usually (depending on the application protocol) + * the workaround is not needed. Unfortunately some broken SSL/TLS + * implementations cannot handle it at all, which is why we include it + * in SSL_OP_ALL. Added in 0.9.6e + */ +# define SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS SSL_OP_BIT(11) + /* DTLS options */ +# define SSL_OP_NO_QUERY_MTU SSL_OP_BIT(12) + /* Turn on Cookie Exchange (on relevant for servers) */ +# define SSL_OP_COOKIE_EXCHANGE SSL_OP_BIT(13) + /* Don't use RFC4507 ticket extension */ +# define SSL_OP_NO_TICKET SSL_OP_BIT(14) +# ifndef OPENSSL_NO_DTLS1_METHOD + /* + * Use Cisco's version identifier of DTLS_BAD_VER + * (only with deprecated DTLSv1_client_method()) + */ +# define SSL_OP_CISCO_ANYCONNECT SSL_OP_BIT(15) +# endif + /* As server, disallow session resumption on renegotiation */ +# define SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION SSL_OP_BIT(16) + /* Don't use compression even if supported */ +# define SSL_OP_NO_COMPRESSION SSL_OP_BIT(17) + /* Permit unsafe legacy renegotiation */ +# define SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION SSL_OP_BIT(18) + /* Disable encrypt-then-mac */ +# define SSL_OP_NO_ENCRYPT_THEN_MAC SSL_OP_BIT(19) + /* + * Enable TLSv1.3 Compatibility mode. This is on by default. A future + * version of OpenSSL may have this disabled by default. + */ +# define SSL_OP_ENABLE_MIDDLEBOX_COMPAT SSL_OP_BIT(20) + /* + * Prioritize Chacha20Poly1305 when client does. + * Modifies SSL_OP_CIPHER_SERVER_PREFERENCE + */ +# define SSL_OP_PRIORITIZE_CHACHA SSL_OP_BIT(21) + /* + * Set on servers to choose the cipher according to server's preferences. + */ +# define SSL_OP_CIPHER_SERVER_PREFERENCE SSL_OP_BIT(22) + /* + * If set, a server will allow a client to issue a SSLv3.0 version + * number as latest version supported in the premaster secret, even when + * TLSv1.0 (version 3.1) was announced in the client hello. Normally + * this is forbidden to prevent version rollback attacks. + */ +# define SSL_OP_TLS_ROLLBACK_BUG SSL_OP_BIT(23) + /* + * Switches off automatic TLSv1.3 anti-replay protection for early data. + * This is a server-side option only (no effect on the client). + */ +# define SSL_OP_NO_ANTI_REPLAY SSL_OP_BIT(24) +# define SSL_OP_NO_SSLv3 SSL_OP_BIT(25) +# define SSL_OP_NO_TLSv1 SSL_OP_BIT(26) +# define SSL_OP_NO_TLSv1_2 SSL_OP_BIT(27) +# define SSL_OP_NO_TLSv1_1 SSL_OP_BIT(28) +# define SSL_OP_NO_TLSv1_3 SSL_OP_BIT(29) +# define SSL_OP_NO_DTLSv1 SSL_OP_BIT(26) +# define SSL_OP_NO_DTLSv1_2 SSL_OP_BIT(27) + /* Disallow all renegotiation */ +# define SSL_OP_NO_RENEGOTIATION SSL_OP_BIT(30) + /* + * Make server add server-hello extension from early version of + * cryptopro draft, when GOST ciphersuite is negotiated. Required for + * interoperability with CryptoPro CSP 3.x + */ +# define SSL_OP_CRYPTOPRO_TLSEXT_BUG SSL_OP_BIT(31) +/* + * Disable RFC8879 certificate compression + * SSL_OP_NO_TX_CERTIFICATE_COMPRESSION: don't send compressed certificates, + * and ignore the extension when received. + * SSL_OP_NO_RX_CERTIFICATE_COMPRESSION: don't send the extension, and + * subsequently indicating that receiving is not supported + */ +# define SSL_OP_NO_TX_CERTIFICATE_COMPRESSION SSL_OP_BIT(32) +# define SSL_OP_NO_RX_CERTIFICATE_COMPRESSION SSL_OP_BIT(33) + /* Enable KTLS TX zerocopy on Linux */ +# define SSL_OP_ENABLE_KTLS_TX_ZEROCOPY_SENDFILE SSL_OP_BIT(34) + +/* + * Option "collections." + */ +# define SSL_OP_NO_SSL_MASK \ + ( SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1 | SSL_OP_NO_TLSv1_1 \ + | SSL_OP_NO_TLSv1_2 | SSL_OP_NO_TLSv1_3 ) +# define SSL_OP_NO_DTLS_MASK \ + ( SSL_OP_NO_DTLSv1 | SSL_OP_NO_DTLSv1_2 ) + +/* Various bug workarounds that should be rather harmless. */ +# define SSL_OP_ALL \ + ( SSL_OP_CRYPTOPRO_TLSEXT_BUG | SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS \ + | SSL_OP_TLSEXT_PADDING | SSL_OP_SAFARI_ECDHE_ECDSA_BUG ) + +/* + * OBSOLETE OPTIONS retained for compatibility + */ + +# define SSL_OP_MICROSOFT_SESS_ID_BUG 0x0 +# define SSL_OP_NETSCAPE_CHALLENGE_BUG 0x0 +# define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x0 +# define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0 +# define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x0 +# define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 +# define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x0 +# define SSL_OP_TLS_D5_BUG 0x0 +# define SSL_OP_TLS_BLOCK_PADDING_BUG 0x0 +# define SSL_OP_SINGLE_ECDH_USE 0x0 +# define SSL_OP_SINGLE_DH_USE 0x0 +# define SSL_OP_EPHEMERAL_RSA 0x0 +# define SSL_OP_NO_SSLv2 0x0 +# define SSL_OP_PKCS1_CHECK_1 0x0 +# define SSL_OP_PKCS1_CHECK_2 0x0 +# define SSL_OP_NETSCAPE_CA_DN_BUG 0x0 +# define SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG 0x0 + +/* + * Allow SSL_write(..., n) to return r with 0 < r < n (i.e. report success + * when just a single record has been written): + */ +# define SSL_MODE_ENABLE_PARTIAL_WRITE 0x00000001U +/* + * Make it possible to retry SSL_write() with changed buffer location (buffer + * contents must stay the same!); this is not the default to avoid the + * misconception that non-blocking SSL_write() behaves like non-blocking + * write(): + */ +# define SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER 0x00000002U +/* + * Never bother the application with retries if the transport is blocking: + */ +# define SSL_MODE_AUTO_RETRY 0x00000004U +/* Don't attempt to automatically build certificate chain */ +# define SSL_MODE_NO_AUTO_CHAIN 0x00000008U +/* + * Save RAM by releasing read and write buffers when they're empty. (SSL3 and + * TLS only.) Released buffers are freed. + */ +# define SSL_MODE_RELEASE_BUFFERS 0x00000010U +/* + * Send the current time in the Random fields of the ClientHello and + * ServerHello records for compatibility with hypothetical implementations + * that require it. + */ +# define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020U +# define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040U +/* + * Send TLS_FALLBACK_SCSV in the ClientHello. To be set only by applications + * that reconnect with a downgraded protocol version; see + * draft-ietf-tls-downgrade-scsv-00 for details. DO NOT ENABLE THIS if your + * application attempts a normal handshake. Only use this in explicit + * fallback retries, following the guidance in + * draft-ietf-tls-downgrade-scsv-00. + */ +# define SSL_MODE_SEND_FALLBACK_SCSV 0x00000080U +/* + * Support Asynchronous operation + */ +# define SSL_MODE_ASYNC 0x00000100U + +/* + * When using DTLS/SCTP, include the terminating zero in the label + * used for computing the endpoint-pair shared secret. Required for + * interoperability with implementations having this bug like these + * older version of OpenSSL: + * - OpenSSL 1.0.0 series + * - OpenSSL 1.0.1 series + * - OpenSSL 1.0.2 series + * - OpenSSL 1.1.0 series + * - OpenSSL 1.1.1 and 1.1.1a + */ +# define SSL_MODE_DTLS_SCTP_LABEL_LENGTH_BUG 0x00000400U + +/* Cert related flags */ +/* + * Many implementations ignore some aspects of the TLS standards such as + * enforcing certificate chain algorithms. When this is set we enforce them. + */ +# define SSL_CERT_FLAG_TLS_STRICT 0x00000001U + +/* Suite B modes, takes same values as certificate verify flags */ +# define SSL_CERT_FLAG_SUITEB_128_LOS_ONLY 0x10000 +/* Suite B 192 bit only mode */ +# define SSL_CERT_FLAG_SUITEB_192_LOS 0x20000 +/* Suite B 128 bit mode allowing 192 bit algorithms */ +# define SSL_CERT_FLAG_SUITEB_128_LOS 0x30000 + +/* Perform all sorts of protocol violations for testing purposes */ +# define SSL_CERT_FLAG_BROKEN_PROTOCOL 0x10000000 + +/* Flags for building certificate chains */ +/* Treat any existing certificates as untrusted CAs */ +# define SSL_BUILD_CHAIN_FLAG_UNTRUSTED 0x1 +/* Don't include root CA in chain */ +# define SSL_BUILD_CHAIN_FLAG_NO_ROOT 0x2 +/* Just check certificates already there */ +# define SSL_BUILD_CHAIN_FLAG_CHECK 0x4 +/* Ignore verification errors */ +# define SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR 0x8 +/* Clear verification errors from queue */ +# define SSL_BUILD_CHAIN_FLAG_CLEAR_ERROR 0x10 + +/* Flags returned by SSL_check_chain */ +/* Certificate can be used with this session */ +# define CERT_PKEY_VALID 0x1 +/* Certificate can also be used for signing */ +# define CERT_PKEY_SIGN 0x2 +/* EE certificate signing algorithm OK */ +# define CERT_PKEY_EE_SIGNATURE 0x10 +/* CA signature algorithms OK */ +# define CERT_PKEY_CA_SIGNATURE 0x20 +/* EE certificate parameters OK */ +# define CERT_PKEY_EE_PARAM 0x40 +/* CA certificate parameters OK */ +# define CERT_PKEY_CA_PARAM 0x80 +/* Signing explicitly allowed as opposed to SHA1 fallback */ +# define CERT_PKEY_EXPLICIT_SIGN 0x100 +/* Client CA issuer names match (always set for server cert) */ +# define CERT_PKEY_ISSUER_NAME 0x200 +/* Cert type matches client types (always set for server cert) */ +# define CERT_PKEY_CERT_TYPE 0x400 +/* Cert chain suitable to Suite B */ +# define CERT_PKEY_SUITEB 0x800 +/* Cert pkey valid for raw public key use */ +# define CERT_PKEY_RPK 0x1000 + +# define SSL_CONF_FLAG_CMDLINE 0x1 +# define SSL_CONF_FLAG_FILE 0x2 +# define SSL_CONF_FLAG_CLIENT 0x4 +# define SSL_CONF_FLAG_SERVER 0x8 +# define SSL_CONF_FLAG_SHOW_ERRORS 0x10 +# define SSL_CONF_FLAG_CERTIFICATE 0x20 +# define SSL_CONF_FLAG_REQUIRE_PRIVATE 0x40 +/* Configuration value types */ +# define SSL_CONF_TYPE_UNKNOWN 0x0 +# define SSL_CONF_TYPE_STRING 0x1 +# define SSL_CONF_TYPE_FILE 0x2 +# define SSL_CONF_TYPE_DIR 0x3 +# define SSL_CONF_TYPE_NONE 0x4 +# define SSL_CONF_TYPE_STORE 0x5 + +/* Maximum length of the application-controlled segment of a a TLSv1.3 cookie */ +# define SSL_COOKIE_LENGTH 4096 + +/* + * Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value, they + * cannot be used to clear bits. + */ + +uint64_t SSL_CTX_get_options(const SSL_CTX *ctx); +uint64_t SSL_get_options(const SSL *s); +uint64_t SSL_CTX_clear_options(SSL_CTX *ctx, uint64_t op); +uint64_t SSL_clear_options(SSL *s, uint64_t op); +uint64_t SSL_CTX_set_options(SSL_CTX *ctx, uint64_t op); +uint64_t SSL_set_options(SSL *s, uint64_t op); + +# define SSL_CTX_set_mode(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_MODE,(op),NULL) +# define SSL_CTX_clear_mode(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CLEAR_MODE,(op),NULL) +# define SSL_CTX_get_mode(ctx) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_MODE,0,NULL) +# define SSL_clear_mode(ssl,op) \ + SSL_ctrl((ssl),SSL_CTRL_CLEAR_MODE,(op),NULL) +# define SSL_set_mode(ssl,op) \ + SSL_ctrl((ssl),SSL_CTRL_MODE,(op),NULL) +# define SSL_get_mode(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_MODE,0,NULL) +# define SSL_set_mtu(ssl, mtu) \ + SSL_ctrl((ssl),SSL_CTRL_SET_MTU,(mtu),NULL) +# define DTLS_set_link_mtu(ssl, mtu) \ + SSL_ctrl((ssl),DTLS_CTRL_SET_LINK_MTU,(mtu),NULL) +# define DTLS_get_link_min_mtu(ssl) \ + SSL_ctrl((ssl),DTLS_CTRL_GET_LINK_MIN_MTU,0,NULL) + +# define SSL_get_secure_renegotiation_support(ssl) \ + SSL_ctrl((ssl), SSL_CTRL_GET_RI_SUPPORT, 0, NULL) + +# define SSL_CTX_set_cert_flags(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CERT_FLAGS,(op),NULL) +# define SSL_set_cert_flags(s,op) \ + SSL_ctrl((s),SSL_CTRL_CERT_FLAGS,(op),NULL) +# define SSL_CTX_clear_cert_flags(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL) +# define SSL_clear_cert_flags(s,op) \ + SSL_ctrl((s),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL) + +void SSL_CTX_set_msg_callback(SSL_CTX *ctx, + void (*cb) (int write_p, int version, + int content_type, const void *buf, + size_t len, SSL *ssl, void *arg)); +void SSL_set_msg_callback(SSL *ssl, + void (*cb) (int write_p, int version, + int content_type, const void *buf, + size_t len, SSL *ssl, void *arg)); +# define SSL_CTX_set_msg_callback_arg(ctx, arg) SSL_CTX_ctrl((ctx), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) +# define SSL_set_msg_callback_arg(ssl, arg) SSL_ctrl((ssl), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) + +# define SSL_get_extms_support(s) \ + SSL_ctrl((s),SSL_CTRL_GET_EXTMS_SUPPORT,0,NULL) + +# ifndef OPENSSL_NO_SRP +/* see tls_srp.c */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 __owur int SSL_SRP_CTX_init(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur int SSL_CTX_SRP_CTX_init(SSL_CTX *ctx); +OSSL_DEPRECATEDIN_3_0 int SSL_SRP_CTX_free(SSL *ctx); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_SRP_CTX_free(SSL_CTX *ctx); +OSSL_DEPRECATEDIN_3_0 __owur int SSL_srp_server_param_with_username(SSL *s, + int *ad); +OSSL_DEPRECATEDIN_3_0 __owur int SRP_Calc_A_param(SSL *s); +# endif +# endif + +/* 100k max cert list */ +# define SSL_MAX_CERT_LIST_DEFAULT (1024*100) + +# define SSL_SESSION_CACHE_MAX_SIZE_DEFAULT (1024*20) + +/* + * This callback type is used inside SSL_CTX, SSL, and in the functions that + * set them. It is used to override the generation of SSL/TLS session IDs in + * a server. Return value should be zero on an error, non-zero to proceed. + * Also, callbacks should themselves check if the id they generate is unique + * otherwise the SSL handshake will fail with an error - callbacks can do + * this using the 'ssl' value they're passed by; + * SSL_has_matching_session_id(ssl, id, *id_len) The length value passed in + * is set at the maximum size the session ID can be. In SSLv3/TLSv1 it is 32 + * bytes. The callback can alter this length to be less if desired. It is + * also an error for the callback to set the size to zero. + */ +typedef int (*GEN_SESSION_CB) (SSL *ssl, unsigned char *id, + unsigned int *id_len); + +# define SSL_SESS_CACHE_OFF 0x0000 +# define SSL_SESS_CACHE_CLIENT 0x0001 +# define SSL_SESS_CACHE_SERVER 0x0002 +# define SSL_SESS_CACHE_BOTH (SSL_SESS_CACHE_CLIENT|SSL_SESS_CACHE_SERVER) +# define SSL_SESS_CACHE_NO_AUTO_CLEAR 0x0080 +/* enough comments already ... see SSL_CTX_set_session_cache_mode(3) */ +# define SSL_SESS_CACHE_NO_INTERNAL_LOOKUP 0x0100 +# define SSL_SESS_CACHE_NO_INTERNAL_STORE 0x0200 +# define SSL_SESS_CACHE_NO_INTERNAL \ + (SSL_SESS_CACHE_NO_INTERNAL_LOOKUP|SSL_SESS_CACHE_NO_INTERNAL_STORE) +# define SSL_SESS_CACHE_UPDATE_TIME 0x0400 + +LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(SSL_CTX *ctx); +# define SSL_CTX_sess_number(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_NUMBER,0,NULL) +# define SSL_CTX_sess_connect(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT,0,NULL) +# define SSL_CTX_sess_connect_good(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT_GOOD,0,NULL) +# define SSL_CTX_sess_connect_renegotiate(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT_RENEGOTIATE,0,NULL) +# define SSL_CTX_sess_accept(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT,0,NULL) +# define SSL_CTX_sess_accept_renegotiate(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT_RENEGOTIATE,0,NULL) +# define SSL_CTX_sess_accept_good(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT_GOOD,0,NULL) +# define SSL_CTX_sess_hits(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_HIT,0,NULL) +# define SSL_CTX_sess_cb_hits(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CB_HIT,0,NULL) +# define SSL_CTX_sess_misses(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_MISSES,0,NULL) +# define SSL_CTX_sess_timeouts(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_TIMEOUTS,0,NULL) +# define SSL_CTX_sess_cache_full(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CACHE_FULL,0,NULL) + +void SSL_CTX_sess_set_new_cb(SSL_CTX *ctx, + int (*new_session_cb) (struct ssl_st *ssl, + SSL_SESSION *sess)); +int (*SSL_CTX_sess_get_new_cb(SSL_CTX *ctx)) (struct ssl_st *ssl, + SSL_SESSION *sess); +void SSL_CTX_sess_set_remove_cb(SSL_CTX *ctx, + void (*remove_session_cb) (struct ssl_ctx_st + *ctx, + SSL_SESSION *sess)); +void (*SSL_CTX_sess_get_remove_cb(SSL_CTX *ctx)) (struct ssl_ctx_st *ctx, + SSL_SESSION *sess); +void SSL_CTX_sess_set_get_cb(SSL_CTX *ctx, + SSL_SESSION *(*get_session_cb) (struct ssl_st + *ssl, + const unsigned char + *data, int len, + int *copy)); +SSL_SESSION *(*SSL_CTX_sess_get_get_cb(SSL_CTX *ctx)) (struct ssl_st *ssl, + const unsigned char *data, + int len, int *copy); +void SSL_CTX_set_info_callback(SSL_CTX *ctx, + void (*cb) (const SSL *ssl, int type, int val)); +void (*SSL_CTX_get_info_callback(SSL_CTX *ctx)) (const SSL *ssl, int type, + int val); +void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, + int (*client_cert_cb) (SSL *ssl, X509 **x509, + EVP_PKEY **pkey)); +int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx)) (SSL *ssl, X509 **x509, + EVP_PKEY **pkey); +# ifndef OPENSSL_NO_ENGINE +__owur int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e); +# endif +void SSL_CTX_set_cookie_generate_cb(SSL_CTX *ctx, + int (*app_gen_cookie_cb) (SSL *ssl, + unsigned char + *cookie, + unsigned int + *cookie_len)); +void SSL_CTX_set_cookie_verify_cb(SSL_CTX *ctx, + int (*app_verify_cookie_cb) (SSL *ssl, + const unsigned + char *cookie, + unsigned int + cookie_len)); + +void SSL_CTX_set_stateless_cookie_generate_cb( + SSL_CTX *ctx, + int (*gen_stateless_cookie_cb) (SSL *ssl, + unsigned char *cookie, + size_t *cookie_len)); +void SSL_CTX_set_stateless_cookie_verify_cb( + SSL_CTX *ctx, + int (*verify_stateless_cookie_cb) (SSL *ssl, + const unsigned char *cookie, + size_t cookie_len)); +# ifndef OPENSSL_NO_NEXTPROTONEG + +typedef int (*SSL_CTX_npn_advertised_cb_func)(SSL *ssl, + const unsigned char **out, + unsigned int *outlen, + void *arg); +void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s, + SSL_CTX_npn_advertised_cb_func cb, + void *arg); +# define SSL_CTX_set_npn_advertised_cb SSL_CTX_set_next_protos_advertised_cb + +typedef int (*SSL_CTX_npn_select_cb_func)(SSL *s, + unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg); +void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s, + SSL_CTX_npn_select_cb_func cb, + void *arg); +# define SSL_CTX_set_npn_select_cb SSL_CTX_set_next_proto_select_cb + +void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, + unsigned *len); +# define SSL_get0_npn_negotiated SSL_get0_next_proto_negotiated +# endif + +__owur int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, + const unsigned char *in, unsigned int inlen, + const unsigned char *client, + unsigned int client_len); + +# define OPENSSL_NPN_UNSUPPORTED 0 +# define OPENSSL_NPN_NEGOTIATED 1 +# define OPENSSL_NPN_NO_OVERLAP 2 + +__owur int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char *protos, + unsigned int protos_len); +__owur int SSL_set_alpn_protos(SSL *ssl, const unsigned char *protos, + unsigned int protos_len); +typedef int (*SSL_CTX_alpn_select_cb_func)(SSL *ssl, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg); +void SSL_CTX_set_alpn_select_cb(SSL_CTX *ctx, + SSL_CTX_alpn_select_cb_func cb, + void *arg); +void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data, + unsigned int *len); + +# ifndef OPENSSL_NO_PSK +/* + * the maximum length of the buffer given to callbacks containing the + * resulting identity/psk + */ +# define PSK_MAX_IDENTITY_LEN 256 +# define PSK_MAX_PSK_LEN 512 +typedef unsigned int (*SSL_psk_client_cb_func)(SSL *ssl, + const char *hint, + char *identity, + unsigned int max_identity_len, + unsigned char *psk, + unsigned int max_psk_len); +void SSL_CTX_set_psk_client_callback(SSL_CTX *ctx, SSL_psk_client_cb_func cb); +void SSL_set_psk_client_callback(SSL *ssl, SSL_psk_client_cb_func cb); + +typedef unsigned int (*SSL_psk_server_cb_func)(SSL *ssl, + const char *identity, + unsigned char *psk, + unsigned int max_psk_len); +void SSL_CTX_set_psk_server_callback(SSL_CTX *ctx, SSL_psk_server_cb_func cb); +void SSL_set_psk_server_callback(SSL *ssl, SSL_psk_server_cb_func cb); + +__owur int SSL_CTX_use_psk_identity_hint(SSL_CTX *ctx, const char *identity_hint); +__owur int SSL_use_psk_identity_hint(SSL *s, const char *identity_hint); +const char *SSL_get_psk_identity_hint(const SSL *s); +const char *SSL_get_psk_identity(const SSL *s); +# endif + +typedef int (*SSL_psk_find_session_cb_func)(SSL *ssl, + const unsigned char *identity, + size_t identity_len, + SSL_SESSION **sess); +typedef int (*SSL_psk_use_session_cb_func)(SSL *ssl, const EVP_MD *md, + const unsigned char **id, + size_t *idlen, + SSL_SESSION **sess); + +void SSL_set_psk_find_session_callback(SSL *s, SSL_psk_find_session_cb_func cb); +void SSL_CTX_set_psk_find_session_callback(SSL_CTX *ctx, + SSL_psk_find_session_cb_func cb); +void SSL_set_psk_use_session_callback(SSL *s, SSL_psk_use_session_cb_func cb); +void SSL_CTX_set_psk_use_session_callback(SSL_CTX *ctx, + SSL_psk_use_session_cb_func cb); + +/* Register callbacks to handle custom TLS Extensions for client or server. */ + +__owur int SSL_CTX_has_client_custom_ext(const SSL_CTX *ctx, + unsigned int ext_type); + +__owur int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, + unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + +__owur int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, + unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + +__owur int SSL_CTX_add_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + unsigned int context, + SSL_custom_ext_add_cb_ex add_cb, + SSL_custom_ext_free_cb_ex free_cb, + void *add_arg, + SSL_custom_ext_parse_cb_ex parse_cb, + void *parse_arg); + +__owur int SSL_extension_supported(unsigned int ext_type); + +# define SSL_NOTHING 1 +# define SSL_WRITING 2 +# define SSL_READING 3 +# define SSL_X509_LOOKUP 4 +# define SSL_ASYNC_PAUSED 5 +# define SSL_ASYNC_NO_JOBS 6 +# define SSL_CLIENT_HELLO_CB 7 +# define SSL_RETRY_VERIFY 8 + +/* These will only be used when doing non-blocking IO */ +# define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING) +# define SSL_want_read(s) (SSL_want(s) == SSL_READING) +# define SSL_want_write(s) (SSL_want(s) == SSL_WRITING) +# define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP) +# define SSL_want_retry_verify(s) (SSL_want(s) == SSL_RETRY_VERIFY) +# define SSL_want_async(s) (SSL_want(s) == SSL_ASYNC_PAUSED) +# define SSL_want_async_job(s) (SSL_want(s) == SSL_ASYNC_NO_JOBS) +# define SSL_want_client_hello_cb(s) (SSL_want(s) == SSL_CLIENT_HELLO_CB) + +# define SSL_MAC_FLAG_READ_MAC_STREAM 1 +# define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 +# define SSL_MAC_FLAG_READ_MAC_TLSTREE 4 +# define SSL_MAC_FLAG_WRITE_MAC_TLSTREE 8 + +/* + * A callback for logging out TLS key material. This callback should log out + * |line| followed by a newline. + */ +typedef void (*SSL_CTX_keylog_cb_func)(const SSL *ssl, const char *line); + +/* + * SSL_CTX_set_keylog_callback configures a callback to log key material. This + * is intended for debugging use with tools like Wireshark. The cb function + * should log line followed by a newline. + */ +void SSL_CTX_set_keylog_callback(SSL_CTX *ctx, SSL_CTX_keylog_cb_func cb); + +/* + * SSL_CTX_get_keylog_callback returns the callback configured by + * SSL_CTX_set_keylog_callback. + */ +SSL_CTX_keylog_cb_func SSL_CTX_get_keylog_callback(const SSL_CTX *ctx); + +int SSL_CTX_set_max_early_data(SSL_CTX *ctx, uint32_t max_early_data); +uint32_t SSL_CTX_get_max_early_data(const SSL_CTX *ctx); +int SSL_set_max_early_data(SSL *s, uint32_t max_early_data); +uint32_t SSL_get_max_early_data(const SSL *s); +int SSL_CTX_set_recv_max_early_data(SSL_CTX *ctx, uint32_t recv_max_early_data); +uint32_t SSL_CTX_get_recv_max_early_data(const SSL_CTX *ctx); +int SSL_set_recv_max_early_data(SSL *s, uint32_t recv_max_early_data); +uint32_t SSL_get_recv_max_early_data(const SSL *s); + +#ifdef __cplusplus +} +#endif + +# include +# include +# include /* This is mostly sslv3 with a few tweaks */ +# include /* Datagram TLS */ +# include /* Support for the use_srtp extension */ +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * These need to be after the above set of includes due to a compiler bug + * in VisualStudio 2015 + */ +SKM_DEFINE_STACK_OF_INTERNAL(SSL_CIPHER, const SSL_CIPHER, SSL_CIPHER) +#define sk_SSL_CIPHER_num(sk) OPENSSL_sk_num(ossl_check_const_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_value(sk, idx) ((const SSL_CIPHER *)OPENSSL_sk_value(ossl_check_const_SSL_CIPHER_sk_type(sk), (idx))) +#define sk_SSL_CIPHER_new(cmp) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new(ossl_check_SSL_CIPHER_compfunc_type(cmp))) +#define sk_SSL_CIPHER_new_null() ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new_null()) +#define sk_SSL_CIPHER_new_reserve(cmp, n) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new_reserve(ossl_check_SSL_CIPHER_compfunc_type(cmp), (n))) +#define sk_SSL_CIPHER_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SSL_CIPHER_sk_type(sk), (n)) +#define sk_SSL_CIPHER_free(sk) OPENSSL_sk_free(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_zero(sk) OPENSSL_sk_zero(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_delete(sk, i) ((const SSL_CIPHER *)OPENSSL_sk_delete(ossl_check_SSL_CIPHER_sk_type(sk), (i))) +#define sk_SSL_CIPHER_delete_ptr(sk, ptr) ((const SSL_CIPHER *)OPENSSL_sk_delete_ptr(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr))) +#define sk_SSL_CIPHER_push(sk, ptr) OPENSSL_sk_push(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_pop(sk) ((const SSL_CIPHER *)OPENSSL_sk_pop(ossl_check_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_shift(sk) ((const SSL_CIPHER *)OPENSSL_sk_shift(ossl_check_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SSL_CIPHER_sk_type(sk),ossl_check_SSL_CIPHER_freefunc_type(freefunc)) +#define sk_SSL_CIPHER_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr), (idx)) +#define sk_SSL_CIPHER_set(sk, idx, ptr) ((const SSL_CIPHER *)OPENSSL_sk_set(ossl_check_SSL_CIPHER_sk_type(sk), (idx), ossl_check_SSL_CIPHER_type(ptr))) +#define sk_SSL_CIPHER_find(sk, ptr) OPENSSL_sk_find(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr), pnum) +#define sk_SSL_CIPHER_sort(sk) OPENSSL_sk_sort(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_dup(sk) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_dup(ossl_check_const_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_deep_copy(ossl_check_const_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_copyfunc_type(copyfunc), ossl_check_SSL_CIPHER_freefunc_type(freefunc))) +#define sk_SSL_CIPHER_set_cmp_func(sk, cmp) ((sk_SSL_CIPHER_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(SSL_COMP, SSL_COMP, SSL_COMP) +#define sk_SSL_COMP_num(sk) OPENSSL_sk_num(ossl_check_const_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_value(sk, idx) ((SSL_COMP *)OPENSSL_sk_value(ossl_check_const_SSL_COMP_sk_type(sk), (idx))) +#define sk_SSL_COMP_new(cmp) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new(ossl_check_SSL_COMP_compfunc_type(cmp))) +#define sk_SSL_COMP_new_null() ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new_null()) +#define sk_SSL_COMP_new_reserve(cmp, n) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new_reserve(ossl_check_SSL_COMP_compfunc_type(cmp), (n))) +#define sk_SSL_COMP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SSL_COMP_sk_type(sk), (n)) +#define sk_SSL_COMP_free(sk) OPENSSL_sk_free(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_zero(sk) OPENSSL_sk_zero(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_delete(sk, i) ((SSL_COMP *)OPENSSL_sk_delete(ossl_check_SSL_COMP_sk_type(sk), (i))) +#define sk_SSL_COMP_delete_ptr(sk, ptr) ((SSL_COMP *)OPENSSL_sk_delete_ptr(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr))) +#define sk_SSL_COMP_push(sk, ptr) OPENSSL_sk_push(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_pop(sk) ((SSL_COMP *)OPENSSL_sk_pop(ossl_check_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_shift(sk) ((SSL_COMP *)OPENSSL_sk_shift(ossl_check_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SSL_COMP_sk_type(sk),ossl_check_SSL_COMP_freefunc_type(freefunc)) +#define sk_SSL_COMP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr), (idx)) +#define sk_SSL_COMP_set(sk, idx, ptr) ((SSL_COMP *)OPENSSL_sk_set(ossl_check_SSL_COMP_sk_type(sk), (idx), ossl_check_SSL_COMP_type(ptr))) +#define sk_SSL_COMP_find(sk, ptr) OPENSSL_sk_find(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr), pnum) +#define sk_SSL_COMP_sort(sk) OPENSSL_sk_sort(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_dup(sk) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_dup(ossl_check_const_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_deep_copy(ossl_check_const_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_copyfunc_type(copyfunc), ossl_check_SSL_COMP_freefunc_type(freefunc))) +#define sk_SSL_COMP_set_cmp_func(sk, cmp) ((sk_SSL_COMP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_compfunc_type(cmp))) + + +/* compatibility */ +# define SSL_set_app_data(s,arg) (SSL_set_ex_data(s,0,(char *)(arg))) +# define SSL_get_app_data(s) (SSL_get_ex_data(s,0)) +# define SSL_SESSION_set_app_data(s,a) (SSL_SESSION_set_ex_data(s,0, \ + (char *)(a))) +# define SSL_SESSION_get_app_data(s) (SSL_SESSION_get_ex_data(s,0)) +# define SSL_CTX_get_app_data(ctx) (SSL_CTX_get_ex_data(ctx,0)) +# define SSL_CTX_set_app_data(ctx,arg) (SSL_CTX_set_ex_data(ctx,0, \ + (char *)(arg))) +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void SSL_set_debug(SSL *s, int debug); +# endif + +/* TLSv1.3 KeyUpdate message types */ +/* -1 used so that this is an invalid value for the on-the-wire protocol */ +#define SSL_KEY_UPDATE_NONE -1 +/* Values as defined for the on-the-wire protocol */ +#define SSL_KEY_UPDATE_NOT_REQUESTED 0 +#define SSL_KEY_UPDATE_REQUESTED 1 + +/* + * The valid handshake states (one for each type message sent and one for each + * type of message received). There are also two "special" states: + * TLS = TLS or DTLS state + * DTLS = DTLS specific state + * CR/SR = Client Read/Server Read + * CW/SW = Client Write/Server Write + * + * The "special" states are: + * TLS_ST_BEFORE = No handshake has been initiated yet + * TLS_ST_OK = A handshake has been successfully completed + */ +typedef enum { + TLS_ST_BEFORE, + TLS_ST_OK, + DTLS_ST_CR_HELLO_VERIFY_REQUEST, + TLS_ST_CR_SRVR_HELLO, + TLS_ST_CR_CERT, + TLS_ST_CR_COMP_CERT, + TLS_ST_CR_CERT_STATUS, + TLS_ST_CR_KEY_EXCH, + TLS_ST_CR_CERT_REQ, + TLS_ST_CR_SRVR_DONE, + TLS_ST_CR_SESSION_TICKET, + TLS_ST_CR_CHANGE, + TLS_ST_CR_FINISHED, + TLS_ST_CW_CLNT_HELLO, + TLS_ST_CW_CERT, + TLS_ST_CW_COMP_CERT, + TLS_ST_CW_KEY_EXCH, + TLS_ST_CW_CERT_VRFY, + TLS_ST_CW_CHANGE, + TLS_ST_CW_NEXT_PROTO, + TLS_ST_CW_FINISHED, + TLS_ST_SW_HELLO_REQ, + TLS_ST_SR_CLNT_HELLO, + DTLS_ST_SW_HELLO_VERIFY_REQUEST, + TLS_ST_SW_SRVR_HELLO, + TLS_ST_SW_CERT, + TLS_ST_SW_COMP_CERT, + TLS_ST_SW_KEY_EXCH, + TLS_ST_SW_CERT_REQ, + TLS_ST_SW_SRVR_DONE, + TLS_ST_SR_CERT, + TLS_ST_SR_COMP_CERT, + TLS_ST_SR_KEY_EXCH, + TLS_ST_SR_CERT_VRFY, + TLS_ST_SR_NEXT_PROTO, + TLS_ST_SR_CHANGE, + TLS_ST_SR_FINISHED, + TLS_ST_SW_SESSION_TICKET, + TLS_ST_SW_CERT_STATUS, + TLS_ST_SW_CHANGE, + TLS_ST_SW_FINISHED, + TLS_ST_SW_ENCRYPTED_EXTENSIONS, + TLS_ST_CR_ENCRYPTED_EXTENSIONS, + TLS_ST_CR_CERT_VRFY, + TLS_ST_SW_CERT_VRFY, + TLS_ST_CR_HELLO_REQ, + TLS_ST_SW_KEY_UPDATE, + TLS_ST_CW_KEY_UPDATE, + TLS_ST_SR_KEY_UPDATE, + TLS_ST_CR_KEY_UPDATE, + TLS_ST_EARLY_DATA, + TLS_ST_PENDING_EARLY_DATA_END, + TLS_ST_CW_END_OF_EARLY_DATA, + TLS_ST_SR_END_OF_EARLY_DATA +} OSSL_HANDSHAKE_STATE; + +/* + * Most of the following state values are no longer used and are defined to be + * the closest equivalent value in the current state machine code. Not all + * defines have an equivalent and are set to a dummy value (-1). SSL_ST_CONNECT + * and SSL_ST_ACCEPT are still in use in the definition of SSL_CB_ACCEPT_LOOP, + * SSL_CB_ACCEPT_EXIT, SSL_CB_CONNECT_LOOP and SSL_CB_CONNECT_EXIT. + */ + +# define SSL_ST_CONNECT 0x1000 +# define SSL_ST_ACCEPT 0x2000 + +# define SSL_ST_MASK 0x0FFF + +# define SSL_CB_LOOP 0x01 +# define SSL_CB_EXIT 0x02 +# define SSL_CB_READ 0x04 +# define SSL_CB_WRITE 0x08 +# define SSL_CB_ALERT 0x4000/* used in callback */ +# define SSL_CB_READ_ALERT (SSL_CB_ALERT|SSL_CB_READ) +# define SSL_CB_WRITE_ALERT (SSL_CB_ALERT|SSL_CB_WRITE) +# define SSL_CB_ACCEPT_LOOP (SSL_ST_ACCEPT|SSL_CB_LOOP) +# define SSL_CB_ACCEPT_EXIT (SSL_ST_ACCEPT|SSL_CB_EXIT) +# define SSL_CB_CONNECT_LOOP (SSL_ST_CONNECT|SSL_CB_LOOP) +# define SSL_CB_CONNECT_EXIT (SSL_ST_CONNECT|SSL_CB_EXIT) +# define SSL_CB_HANDSHAKE_START 0x10 +# define SSL_CB_HANDSHAKE_DONE 0x20 + +/* Is the SSL_connection established? */ +# define SSL_in_connect_init(a) (SSL_in_init(a) && !SSL_is_server(a)) +# define SSL_in_accept_init(a) (SSL_in_init(a) && SSL_is_server(a)) +int SSL_in_init(const SSL *s); +int SSL_in_before(const SSL *s); +int SSL_is_init_finished(const SSL *s); + +/* + * The following 3 states are kept in ssl->rlayer.rstate when reads fail, you + * should not need these + */ +# define SSL_ST_READ_HEADER 0xF0 +# define SSL_ST_READ_BODY 0xF1 +# define SSL_ST_READ_DONE 0xF2 + +/*- + * Obtain latest Finished message + * -- that we sent (SSL_get_finished) + * -- that we expected from peer (SSL_get_peer_finished). + * Returns length (0 == no Finished so far), copies up to 'count' bytes. + */ +size_t SSL_get_finished(const SSL *s, void *buf, size_t count); +size_t SSL_get_peer_finished(const SSL *s, void *buf, size_t count); + +/* + * use either SSL_VERIFY_NONE or SSL_VERIFY_PEER, the last 3 options are + * 'ored' with SSL_VERIFY_PEER if they are desired + */ +# define SSL_VERIFY_NONE 0x00 +# define SSL_VERIFY_PEER 0x01 +# define SSL_VERIFY_FAIL_IF_NO_PEER_CERT 0x02 +# define SSL_VERIFY_CLIENT_ONCE 0x04 +# define SSL_VERIFY_POST_HANDSHAKE 0x08 + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OpenSSL_add_ssl_algorithms() SSL_library_init() +# define SSLeay_add_ssl_algorithms() SSL_library_init() +# endif + +/* More backward compatibility */ +# define SSL_get_cipher(s) \ + SSL_CIPHER_get_name(SSL_get_current_cipher(s)) +# define SSL_get_cipher_bits(s,np) \ + SSL_CIPHER_get_bits(SSL_get_current_cipher(s),np) +# define SSL_get_cipher_version(s) \ + SSL_CIPHER_get_version(SSL_get_current_cipher(s)) +# define SSL_get_cipher_name(s) \ + SSL_CIPHER_get_name(SSL_get_current_cipher(s)) +# define SSL_get_time(a) SSL_SESSION_get_time(a) +# define SSL_set_time(a,b) SSL_SESSION_set_time((a),(b)) +# define SSL_get_timeout(a) SSL_SESSION_get_timeout(a) +# define SSL_set_timeout(a,b) SSL_SESSION_set_timeout((a),(b)) + +# define d2i_SSL_SESSION_bio(bp,s_id) ASN1_d2i_bio_of(SSL_SESSION,SSL_SESSION_new,d2i_SSL_SESSION,bp,s_id) +# define i2d_SSL_SESSION_bio(bp,s_id) ASN1_i2d_bio_of(SSL_SESSION,i2d_SSL_SESSION,bp,s_id) + +DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) +# define SSL_AD_REASON_OFFSET 1000/* offset to get SSL_R_... value + * from SSL_AD_... */ +/* These alert types are for SSLv3 and TLSv1 */ +# define SSL_AD_CLOSE_NOTIFY SSL3_AD_CLOSE_NOTIFY +/* fatal */ +# define SSL_AD_UNEXPECTED_MESSAGE SSL3_AD_UNEXPECTED_MESSAGE +/* fatal */ +# define SSL_AD_BAD_RECORD_MAC SSL3_AD_BAD_RECORD_MAC +# define SSL_AD_DECRYPTION_FAILED TLS1_AD_DECRYPTION_FAILED +# define SSL_AD_RECORD_OVERFLOW TLS1_AD_RECORD_OVERFLOW +/* fatal */ +# define SSL_AD_DECOMPRESSION_FAILURE SSL3_AD_DECOMPRESSION_FAILURE +/* fatal */ +# define SSL_AD_HANDSHAKE_FAILURE SSL3_AD_HANDSHAKE_FAILURE +/* Not for TLS */ +# define SSL_AD_NO_CERTIFICATE SSL3_AD_NO_CERTIFICATE +# define SSL_AD_BAD_CERTIFICATE SSL3_AD_BAD_CERTIFICATE +# define SSL_AD_UNSUPPORTED_CERTIFICATE SSL3_AD_UNSUPPORTED_CERTIFICATE +# define SSL_AD_CERTIFICATE_REVOKED SSL3_AD_CERTIFICATE_REVOKED +# define SSL_AD_CERTIFICATE_EXPIRED SSL3_AD_CERTIFICATE_EXPIRED +# define SSL_AD_CERTIFICATE_UNKNOWN SSL3_AD_CERTIFICATE_UNKNOWN +/* fatal */ +# define SSL_AD_ILLEGAL_PARAMETER SSL3_AD_ILLEGAL_PARAMETER +/* fatal */ +# define SSL_AD_UNKNOWN_CA TLS1_AD_UNKNOWN_CA +/* fatal */ +# define SSL_AD_ACCESS_DENIED TLS1_AD_ACCESS_DENIED +/* fatal */ +# define SSL_AD_DECODE_ERROR TLS1_AD_DECODE_ERROR +# define SSL_AD_DECRYPT_ERROR TLS1_AD_DECRYPT_ERROR +/* fatal */ +# define SSL_AD_EXPORT_RESTRICTION TLS1_AD_EXPORT_RESTRICTION +/* fatal */ +# define SSL_AD_PROTOCOL_VERSION TLS1_AD_PROTOCOL_VERSION +/* fatal */ +# define SSL_AD_INSUFFICIENT_SECURITY TLS1_AD_INSUFFICIENT_SECURITY +/* fatal */ +# define SSL_AD_INTERNAL_ERROR TLS1_AD_INTERNAL_ERROR +# define SSL_AD_USER_CANCELLED TLS1_AD_USER_CANCELLED +# define SSL_AD_NO_RENEGOTIATION TLS1_AD_NO_RENEGOTIATION +# define SSL_AD_MISSING_EXTENSION TLS13_AD_MISSING_EXTENSION +# define SSL_AD_CERTIFICATE_REQUIRED TLS13_AD_CERTIFICATE_REQUIRED +# define SSL_AD_UNSUPPORTED_EXTENSION TLS1_AD_UNSUPPORTED_EXTENSION +# define SSL_AD_CERTIFICATE_UNOBTAINABLE TLS1_AD_CERTIFICATE_UNOBTAINABLE +# define SSL_AD_UNRECOGNIZED_NAME TLS1_AD_UNRECOGNIZED_NAME +# define SSL_AD_BAD_CERTIFICATE_STATUS_RESPONSE TLS1_AD_BAD_CERTIFICATE_STATUS_RESPONSE +# define SSL_AD_BAD_CERTIFICATE_HASH_VALUE TLS1_AD_BAD_CERTIFICATE_HASH_VALUE +/* fatal */ +# define SSL_AD_UNKNOWN_PSK_IDENTITY TLS1_AD_UNKNOWN_PSK_IDENTITY +/* fatal */ +# define SSL_AD_INAPPROPRIATE_FALLBACK TLS1_AD_INAPPROPRIATE_FALLBACK +# define SSL_AD_NO_APPLICATION_PROTOCOL TLS1_AD_NO_APPLICATION_PROTOCOL +# define SSL_ERROR_NONE 0 +# define SSL_ERROR_SSL 1 +# define SSL_ERROR_WANT_READ 2 +# define SSL_ERROR_WANT_WRITE 3 +# define SSL_ERROR_WANT_X509_LOOKUP 4 +# define SSL_ERROR_SYSCALL 5/* look at error stack/return + * value/errno */ +# define SSL_ERROR_ZERO_RETURN 6 +# define SSL_ERROR_WANT_CONNECT 7 +# define SSL_ERROR_WANT_ACCEPT 8 +# define SSL_ERROR_WANT_ASYNC 9 +# define SSL_ERROR_WANT_ASYNC_JOB 10 +# define SSL_ERROR_WANT_CLIENT_HELLO_CB 11 +# define SSL_ERROR_WANT_RETRY_VERIFY 12 + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTRL_SET_TMP_DH 3 +# define SSL_CTRL_SET_TMP_ECDH 4 +# define SSL_CTRL_SET_TMP_DH_CB 6 +# endif + +# define SSL_CTRL_GET_CLIENT_CERT_REQUEST 9 +# define SSL_CTRL_GET_NUM_RENEGOTIATIONS 10 +# define SSL_CTRL_CLEAR_NUM_RENEGOTIATIONS 11 +# define SSL_CTRL_GET_TOTAL_RENEGOTIATIONS 12 +# define SSL_CTRL_GET_FLAGS 13 +# define SSL_CTRL_EXTRA_CHAIN_CERT 14 +# define SSL_CTRL_SET_MSG_CALLBACK 15 +# define SSL_CTRL_SET_MSG_CALLBACK_ARG 16 +/* only applies to datagram connections */ +# define SSL_CTRL_SET_MTU 17 +/* Stats */ +# define SSL_CTRL_SESS_NUMBER 20 +# define SSL_CTRL_SESS_CONNECT 21 +# define SSL_CTRL_SESS_CONNECT_GOOD 22 +# define SSL_CTRL_SESS_CONNECT_RENEGOTIATE 23 +# define SSL_CTRL_SESS_ACCEPT 24 +# define SSL_CTRL_SESS_ACCEPT_GOOD 25 +# define SSL_CTRL_SESS_ACCEPT_RENEGOTIATE 26 +# define SSL_CTRL_SESS_HIT 27 +# define SSL_CTRL_SESS_CB_HIT 28 +# define SSL_CTRL_SESS_MISSES 29 +# define SSL_CTRL_SESS_TIMEOUTS 30 +# define SSL_CTRL_SESS_CACHE_FULL 31 +# define SSL_CTRL_MODE 33 +# define SSL_CTRL_GET_READ_AHEAD 40 +# define SSL_CTRL_SET_READ_AHEAD 41 +# define SSL_CTRL_SET_SESS_CACHE_SIZE 42 +# define SSL_CTRL_GET_SESS_CACHE_SIZE 43 +# define SSL_CTRL_SET_SESS_CACHE_MODE 44 +# define SSL_CTRL_GET_SESS_CACHE_MODE 45 +# define SSL_CTRL_GET_MAX_CERT_LIST 50 +# define SSL_CTRL_SET_MAX_CERT_LIST 51 +# define SSL_CTRL_SET_MAX_SEND_FRAGMENT 52 +/* see tls1.h for macros based on these */ +# define SSL_CTRL_SET_TLSEXT_SERVERNAME_CB 53 +# define SSL_CTRL_SET_TLSEXT_SERVERNAME_ARG 54 +# define SSL_CTRL_SET_TLSEXT_HOSTNAME 55 +# define SSL_CTRL_SET_TLSEXT_DEBUG_CB 56 +# define SSL_CTRL_SET_TLSEXT_DEBUG_ARG 57 +# define SSL_CTRL_GET_TLSEXT_TICKET_KEYS 58 +# define SSL_CTRL_SET_TLSEXT_TICKET_KEYS 59 +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT 60 */ +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB 61 */ +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB_ARG 62 */ +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB 63 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB_ARG 64 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_TYPE 65 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_EXTS 66 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_EXTS 67 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_IDS 68 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_IDS 69 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_OCSP_RESP 70 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_OCSP_RESP 71 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB 72 +# endif +# define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME_CB 75 +# define SSL_CTRL_SET_SRP_VERIFY_PARAM_CB 76 +# define SSL_CTRL_SET_SRP_GIVE_CLIENT_PWD_CB 77 +# define SSL_CTRL_SET_SRP_ARG 78 +# define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME 79 +# define SSL_CTRL_SET_TLS_EXT_SRP_STRENGTH 80 +# define SSL_CTRL_SET_TLS_EXT_SRP_PASSWORD 81 +# define DTLS_CTRL_GET_TIMEOUT 73 +# define DTLS_CTRL_HANDLE_TIMEOUT 74 +# define SSL_CTRL_GET_RI_SUPPORT 76 +# define SSL_CTRL_CLEAR_MODE 78 +# define SSL_CTRL_SET_NOT_RESUMABLE_SESS_CB 79 +# define SSL_CTRL_GET_EXTRA_CHAIN_CERTS 82 +# define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS 83 +# define SSL_CTRL_CHAIN 88 +# define SSL_CTRL_CHAIN_CERT 89 +# define SSL_CTRL_GET_GROUPS 90 +# define SSL_CTRL_SET_GROUPS 91 +# define SSL_CTRL_SET_GROUPS_LIST 92 +# define SSL_CTRL_GET_SHARED_GROUP 93 +# define SSL_CTRL_SET_SIGALGS 97 +# define SSL_CTRL_SET_SIGALGS_LIST 98 +# define SSL_CTRL_CERT_FLAGS 99 +# define SSL_CTRL_CLEAR_CERT_FLAGS 100 +# define SSL_CTRL_SET_CLIENT_SIGALGS 101 +# define SSL_CTRL_SET_CLIENT_SIGALGS_LIST 102 +# define SSL_CTRL_GET_CLIENT_CERT_TYPES 103 +# define SSL_CTRL_SET_CLIENT_CERT_TYPES 104 +# define SSL_CTRL_BUILD_CERT_CHAIN 105 +# define SSL_CTRL_SET_VERIFY_CERT_STORE 106 +# define SSL_CTRL_SET_CHAIN_CERT_STORE 107 +# define SSL_CTRL_GET_PEER_SIGNATURE_NID 108 +# define SSL_CTRL_GET_PEER_TMP_KEY 109 +# define SSL_CTRL_GET_RAW_CIPHERLIST 110 +# define SSL_CTRL_GET_EC_POINT_FORMATS 111 +# define SSL_CTRL_GET_CHAIN_CERTS 115 +# define SSL_CTRL_SELECT_CURRENT_CERT 116 +# define SSL_CTRL_SET_CURRENT_CERT 117 +# define SSL_CTRL_SET_DH_AUTO 118 +# define DTLS_CTRL_SET_LINK_MTU 120 +# define DTLS_CTRL_GET_LINK_MIN_MTU 121 +# define SSL_CTRL_GET_EXTMS_SUPPORT 122 +# define SSL_CTRL_SET_MIN_PROTO_VERSION 123 +# define SSL_CTRL_SET_MAX_PROTO_VERSION 124 +# define SSL_CTRL_SET_SPLIT_SEND_FRAGMENT 125 +# define SSL_CTRL_SET_MAX_PIPELINES 126 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_TYPE 127 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_CB 128 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_CB_ARG 129 +# define SSL_CTRL_GET_MIN_PROTO_VERSION 130 +# define SSL_CTRL_GET_MAX_PROTO_VERSION 131 +# define SSL_CTRL_GET_SIGNATURE_NID 132 +# define SSL_CTRL_GET_TMP_KEY 133 +# define SSL_CTRL_GET_NEGOTIATED_GROUP 134 +# define SSL_CTRL_GET_IANA_GROUPS 135 +# define SSL_CTRL_SET_RETRY_VERIFY 136 +# define SSL_CTRL_GET_VERIFY_CERT_STORE 137 +# define SSL_CTRL_GET_CHAIN_CERT_STORE 138 +# define SSL_CERT_SET_FIRST 1 +# define SSL_CERT_SET_NEXT 2 +# define SSL_CERT_SET_SERVER 3 +# define DTLSv1_get_timeout(ssl, arg) \ + SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)(arg)) +# define DTLSv1_handle_timeout(ssl) \ + SSL_ctrl(ssl,DTLS_CTRL_HANDLE_TIMEOUT,0, NULL) +# define SSL_num_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_GET_NUM_RENEGOTIATIONS,0,NULL) +# define SSL_clear_num_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_CLEAR_NUM_RENEGOTIATIONS,0,NULL) +# define SSL_total_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_GET_TOTAL_RENEGOTIATIONS,0,NULL) +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTX_set_tmp_dh(ctx,dh) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_TMP_DH,0,(char *)(dh)) +# endif +# define SSL_CTX_set_dh_auto(ctx, onoff) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_DH_AUTO,onoff,NULL) +# define SSL_set_dh_auto(s, onoff) \ + SSL_ctrl(s,SSL_CTRL_SET_DH_AUTO,onoff,NULL) +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_set_tmp_dh(ssl,dh) \ + SSL_ctrl(ssl,SSL_CTRL_SET_TMP_DH,0,(char *)(dh)) +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTX_set_tmp_ecdh(ctx,ecdh) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_TMP_ECDH,0,(char *)(ecdh)) +# define SSL_set_tmp_ecdh(ssl,ecdh) \ + SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)(ecdh)) +# endif +# define SSL_CTX_add_extra_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)(x509)) +# define SSL_CTX_get_extra_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,0,px509) +# define SSL_CTX_get_extra_chain_certs_only(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,1,px509) +# define SSL_CTX_clear_extra_chain_certs(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL) +# define SSL_CTX_set0_chain(ctx,sk) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)(sk)) +# define SSL_CTX_set1_chain(ctx,sk) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,1,(char *)(sk)) +# define SSL_CTX_add0_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,0,(char *)(x509)) +# define SSL_CTX_add1_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,1,(char *)(x509)) +# define SSL_CTX_get0_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERTS,0,px509) +# define SSL_CTX_clear_chain_certs(ctx) \ + SSL_CTX_set0_chain(ctx,NULL) +# define SSL_CTX_build_cert_chain(ctx, flags) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL) +# define SSL_CTX_select_current_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)(x509)) +# define SSL_CTX_set_current_cert(ctx, op) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURRENT_CERT, op, NULL) +# define SSL_CTX_set0_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set1_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)(st)) +# define SSL_CTX_get0_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set0_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set1_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)(st)) +# define SSL_CTX_get0_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_set0_chain(s,sk) \ + SSL_ctrl(s,SSL_CTRL_CHAIN,0,(char *)(sk)) +# define SSL_set1_chain(s,sk) \ + SSL_ctrl(s,SSL_CTRL_CHAIN,1,(char *)(sk)) +# define SSL_add0_chain_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_CHAIN_CERT,0,(char *)(x509)) +# define SSL_add1_chain_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_CHAIN_CERT,1,(char *)(x509)) +# define SSL_get0_chain_certs(s,px509) \ + SSL_ctrl(s,SSL_CTRL_GET_CHAIN_CERTS,0,px509) +# define SSL_clear_chain_certs(s) \ + SSL_set0_chain(s,NULL) +# define SSL_build_cert_chain(s, flags) \ + SSL_ctrl(s,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL) +# define SSL_select_current_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)(x509)) +# define SSL_set_current_cert(s,op) \ + SSL_ctrl(s,SSL_CTRL_SET_CURRENT_CERT, op, NULL) +# define SSL_set0_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_set1_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)(st)) +#define SSL_get0_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_GET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_set0_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_set1_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)(st)) +#define SSL_get0_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_GET_CHAIN_CERT_STORE,0,(char *)(st)) + +# define SSL_get1_groups(s, glist) \ + SSL_ctrl(s,SSL_CTRL_GET_GROUPS,0,(int*)(glist)) +# define SSL_get0_iana_groups(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_IANA_GROUPS,0,(uint16_t **)(plst)) +# define SSL_CTX_set1_groups(ctx, glist, glistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS,glistlen,(int *)(glist)) +# define SSL_CTX_set1_groups_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(s)) +# define SSL_set1_groups(s, glist, glistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_GROUPS,glistlen,(char *)(glist)) +# define SSL_set1_groups_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(str)) +# define SSL_get_shared_group(s, n) \ + SSL_ctrl(s,SSL_CTRL_GET_SHARED_GROUP,n,NULL) +# define SSL_get_negotiated_group(s) \ + SSL_ctrl(s,SSL_CTRL_GET_NEGOTIATED_GROUP,0,NULL) +# define SSL_CTX_set1_sigalgs(ctx, slist, slistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS,slistlen,(int *)(slist)) +# define SSL_CTX_set1_sigalgs_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)(s)) +# define SSL_set1_sigalgs(s, slist, slistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_SIGALGS,slistlen,(int *)(slist)) +# define SSL_set1_sigalgs_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)(str)) +# define SSL_CTX_set1_client_sigalgs(ctx, slist, slistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS,slistlen,(int *)(slist)) +# define SSL_CTX_set1_client_sigalgs_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)(s)) +# define SSL_set1_client_sigalgs(s, slist, slistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_SIGALGS,slistlen,(int *)(slist)) +# define SSL_set1_client_sigalgs_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)(str)) +# define SSL_get0_certificate_types(s, clist) \ + SSL_ctrl(s, SSL_CTRL_GET_CLIENT_CERT_TYPES, 0, (char *)(clist)) +# define SSL_CTX_set1_client_certificate_types(ctx, clist, clistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen, \ + (char *)(clist)) +# define SSL_set1_client_certificate_types(s, clist, clistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)(clist)) +# define SSL_get_signature_nid(s, pn) \ + SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NID,0,pn) +# define SSL_get_peer_signature_nid(s, pn) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NID,0,pn) +# define SSL_get_peer_tmp_key(s, pk) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_TMP_KEY,0,pk) +# define SSL_get_tmp_key(s, pk) \ + SSL_ctrl(s,SSL_CTRL_GET_TMP_KEY,0,pk) +# define SSL_get0_raw_cipherlist(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_RAW_CIPHERLIST,0,plst) +# define SSL_get0_ec_point_formats(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_EC_POINT_FORMATS,0,plst) +# define SSL_CTX_set_min_proto_version(ctx, version) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_SET_MIN_PROTO_VERSION, version, NULL) +# define SSL_CTX_set_max_proto_version(ctx, version) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_SET_MAX_PROTO_VERSION, version, NULL) +# define SSL_CTX_get_min_proto_version(ctx) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_GET_MIN_PROTO_VERSION, 0, NULL) +# define SSL_CTX_get_max_proto_version(ctx) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_GET_MAX_PROTO_VERSION, 0, NULL) +# define SSL_set_min_proto_version(s, version) \ + SSL_ctrl(s, SSL_CTRL_SET_MIN_PROTO_VERSION, version, NULL) +# define SSL_set_max_proto_version(s, version) \ + SSL_ctrl(s, SSL_CTRL_SET_MAX_PROTO_VERSION, version, NULL) +# define SSL_get_min_proto_version(s) \ + SSL_ctrl(s, SSL_CTRL_GET_MIN_PROTO_VERSION, 0, NULL) +# define SSL_get_max_proto_version(s) \ + SSL_ctrl(s, SSL_CTRL_GET_MAX_PROTO_VERSION, 0, NULL) + +const char *SSL_get0_group_name(SSL *s); +const char *SSL_group_to_name(SSL *s, int id); + +/* Backwards compatibility, original 1.1.0 names */ +# define SSL_CTRL_GET_SERVER_TMP_KEY \ + SSL_CTRL_GET_PEER_TMP_KEY +# define SSL_get_server_tmp_key(s, pk) \ + SSL_get_peer_tmp_key(s, pk) + +int SSL_set0_tmp_dh_pkey(SSL *s, EVP_PKEY *dhpkey); +int SSL_CTX_set0_tmp_dh_pkey(SSL_CTX *ctx, EVP_PKEY *dhpkey); + +/* + * The following symbol names are old and obsolete. They are kept + * for compatibility reasons only and should not be used anymore. + */ +# define SSL_CTRL_GET_CURVES SSL_CTRL_GET_GROUPS +# define SSL_CTRL_SET_CURVES SSL_CTRL_SET_GROUPS +# define SSL_CTRL_SET_CURVES_LIST SSL_CTRL_SET_GROUPS_LIST +# define SSL_CTRL_GET_SHARED_CURVE SSL_CTRL_GET_SHARED_GROUP + +# define SSL_get1_curves SSL_get1_groups +# define SSL_CTX_set1_curves SSL_CTX_set1_groups +# define SSL_CTX_set1_curves_list SSL_CTX_set1_groups_list +# define SSL_set1_curves SSL_set1_groups +# define SSL_set1_curves_list SSL_set1_groups_list +# define SSL_get_shared_curve SSL_get_shared_group + + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* Provide some compatibility macros for removed functionality. */ +# define SSL_CTX_need_tmp_RSA(ctx) 0 +# define SSL_CTX_set_tmp_rsa(ctx,rsa) 1 +# define SSL_need_tmp_RSA(ssl) 0 +# define SSL_set_tmp_rsa(ssl,rsa) 1 +# define SSL_CTX_set_ecdh_auto(dummy, onoff) ((onoff) != 0) +# define SSL_set_ecdh_auto(dummy, onoff) ((onoff) != 0) +/* + * We "pretend" to call the callback to avoid warnings about unused static + * functions. + */ +# define SSL_CTX_set_tmp_rsa_callback(ctx, cb) while(0) (cb)(NULL, 0, 0) +# define SSL_set_tmp_rsa_callback(ssl, cb) while(0) (cb)(NULL, 0, 0) +# endif +__owur const BIO_METHOD *BIO_f_ssl(void); +__owur BIO *BIO_new_ssl(SSL_CTX *ctx, int client); +__owur BIO *BIO_new_ssl_connect(SSL_CTX *ctx); +__owur BIO *BIO_new_buffer_ssl_connect(SSL_CTX *ctx); +__owur int BIO_ssl_copy_session_id(BIO *to, BIO *from); +void BIO_ssl_shutdown(BIO *ssl_bio); + +__owur int SSL_CTX_set_cipher_list(SSL_CTX *, const char *str); +__owur SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth); +__owur SSL_CTX *SSL_CTX_new_ex(OSSL_LIB_CTX *libctx, const char *propq, + const SSL_METHOD *meth); +int SSL_CTX_up_ref(SSL_CTX *ctx); +void SSL_CTX_free(SSL_CTX *); +__owur long SSL_CTX_set_timeout(SSL_CTX *ctx, long t); +__owur long SSL_CTX_get_timeout(const SSL_CTX *ctx); +__owur X509_STORE *SSL_CTX_get_cert_store(const SSL_CTX *); +void SSL_CTX_set_cert_store(SSL_CTX *, X509_STORE *); +void SSL_CTX_set1_cert_store(SSL_CTX *, X509_STORE *); +__owur int SSL_want(const SSL *s); +__owur int SSL_clear(SSL *s); + +void SSL_CTX_flush_sessions(SSL_CTX *ctx, long tm); + +__owur const SSL_CIPHER *SSL_get_current_cipher(const SSL *s); +__owur const SSL_CIPHER *SSL_get_pending_cipher(const SSL *s); +__owur int SSL_CIPHER_get_bits(const SSL_CIPHER *c, int *alg_bits); +__owur const char *SSL_CIPHER_get_version(const SSL_CIPHER *c); +__owur const char *SSL_CIPHER_get_name(const SSL_CIPHER *c); +__owur const char *SSL_CIPHER_standard_name(const SSL_CIPHER *c); +__owur const char *OPENSSL_cipher_name(const char *rfc_name); +__owur uint32_t SSL_CIPHER_get_id(const SSL_CIPHER *c); +__owur uint16_t SSL_CIPHER_get_protocol_id(const SSL_CIPHER *c); +__owur int SSL_CIPHER_get_kx_nid(const SSL_CIPHER *c); +__owur int SSL_CIPHER_get_auth_nid(const SSL_CIPHER *c); +__owur const EVP_MD *SSL_CIPHER_get_handshake_digest(const SSL_CIPHER *c); +__owur int SSL_CIPHER_is_aead(const SSL_CIPHER *c); + +__owur int SSL_get_fd(const SSL *s); +__owur int SSL_get_rfd(const SSL *s); +__owur int SSL_get_wfd(const SSL *s); +__owur const char *SSL_get_cipher_list(const SSL *s, int n); +__owur char *SSL_get_shared_ciphers(const SSL *s, char *buf, int size); +__owur int SSL_get_read_ahead(const SSL *s); +__owur int SSL_pending(const SSL *s); +__owur int SSL_has_pending(const SSL *s); +# ifndef OPENSSL_NO_SOCK +__owur int SSL_set_fd(SSL *s, int fd); +__owur int SSL_set_rfd(SSL *s, int fd); +__owur int SSL_set_wfd(SSL *s, int fd); +# endif +void SSL_set0_rbio(SSL *s, BIO *rbio); +void SSL_set0_wbio(SSL *s, BIO *wbio); +void SSL_set_bio(SSL *s, BIO *rbio, BIO *wbio); +__owur BIO *SSL_get_rbio(const SSL *s); +__owur BIO *SSL_get_wbio(const SSL *s); +__owur int SSL_set_cipher_list(SSL *s, const char *str); +__owur int SSL_CTX_set_ciphersuites(SSL_CTX *ctx, const char *str); +__owur int SSL_set_ciphersuites(SSL *s, const char *str); +void SSL_set_read_ahead(SSL *s, int yes); +__owur int SSL_get_verify_mode(const SSL *s); +__owur int SSL_get_verify_depth(const SSL *s); +__owur SSL_verify_cb SSL_get_verify_callback(const SSL *s); +void SSL_set_verify(SSL *s, int mode, SSL_verify_cb callback); +void SSL_set_verify_depth(SSL *s, int depth); +void SSL_set_cert_cb(SSL *s, int (*cb) (SSL *ssl, void *arg), void *arg); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 __owur int SSL_use_RSAPrivateKey(SSL *ssl, RSA *rsa); +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_use_RSAPrivateKey_ASN1(SSL *ssl, + const unsigned char *d, long len); +# endif +__owur int SSL_use_PrivateKey(SSL *ssl, EVP_PKEY *pkey); +__owur int SSL_use_PrivateKey_ASN1(int pk, SSL *ssl, const unsigned char *d, + long len); +__owur int SSL_use_certificate(SSL *ssl, X509 *x); +__owur int SSL_use_certificate_ASN1(SSL *ssl, const unsigned char *d, int len); +__owur int SSL_use_cert_and_key(SSL *ssl, X509 *x509, EVP_PKEY *privatekey, + STACK_OF(X509) *chain, int override); + + +/* serverinfo file format versions */ +# define SSL_SERVERINFOV1 1 +# define SSL_SERVERINFOV2 2 + +/* Set serverinfo data for the current active cert. */ +__owur int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo, + size_t serverinfo_length); +__owur int SSL_CTX_use_serverinfo_ex(SSL_CTX *ctx, unsigned int version, + const unsigned char *serverinfo, + size_t serverinfo_length); +__owur int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_use_RSAPrivateKey_file(SSL *ssl, const char *file, int type); +#endif + +__owur int SSL_use_PrivateKey_file(SSL *ssl, const char *file, int type); +__owur int SSL_use_certificate_file(SSL *ssl, const char *file, int type); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey_file(SSL_CTX *ctx, const char *file, + int type); +#endif +__owur int SSL_CTX_use_PrivateKey_file(SSL_CTX *ctx, const char *file, + int type); +__owur int SSL_CTX_use_certificate_file(SSL_CTX *ctx, const char *file, + int type); +/* PEM type */ +__owur int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file); +__owur int SSL_use_certificate_chain_file(SSL *ssl, const char *file); +__owur STACK_OF(X509_NAME) *SSL_load_client_CA_file(const char *file); +__owur STACK_OF(X509_NAME) +*SSL_load_client_CA_file_ex(const char *file, OSSL_LIB_CTX *libctx, + const char *propq); +__owur int SSL_add_file_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *file); +int SSL_add_dir_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *dir); +int SSL_add_store_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *uri); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_load_error_strings() \ + OPENSSL_init_ssl(OPENSSL_INIT_LOAD_SSL_STRINGS \ + | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL) +# endif + +__owur const char *SSL_state_string(const SSL *s); +__owur const char *SSL_rstate_string(const SSL *s); +__owur const char *SSL_state_string_long(const SSL *s); +__owur const char *SSL_rstate_string_long(const SSL *s); +__owur long SSL_SESSION_get_time(const SSL_SESSION *s); +__owur long SSL_SESSION_set_time(SSL_SESSION *s, long t); +__owur long SSL_SESSION_get_timeout(const SSL_SESSION *s); +__owur long SSL_SESSION_set_timeout(SSL_SESSION *s, long t); +__owur int SSL_SESSION_get_protocol_version(const SSL_SESSION *s); +__owur int SSL_SESSION_set_protocol_version(SSL_SESSION *s, int version); + +__owur const char *SSL_SESSION_get0_hostname(const SSL_SESSION *s); +__owur int SSL_SESSION_set1_hostname(SSL_SESSION *s, const char *hostname); +void SSL_SESSION_get0_alpn_selected(const SSL_SESSION *s, + const unsigned char **alpn, + size_t *len); +__owur int SSL_SESSION_set1_alpn_selected(SSL_SESSION *s, + const unsigned char *alpn, + size_t len); +__owur const SSL_CIPHER *SSL_SESSION_get0_cipher(const SSL_SESSION *s); +__owur int SSL_SESSION_set_cipher(SSL_SESSION *s, const SSL_CIPHER *cipher); +__owur int SSL_SESSION_has_ticket(const SSL_SESSION *s); +__owur unsigned long SSL_SESSION_get_ticket_lifetime_hint(const SSL_SESSION *s); +void SSL_SESSION_get0_ticket(const SSL_SESSION *s, const unsigned char **tick, + size_t *len); +__owur uint32_t SSL_SESSION_get_max_early_data(const SSL_SESSION *s); +__owur int SSL_SESSION_set_max_early_data(SSL_SESSION *s, + uint32_t max_early_data); +__owur int SSL_copy_session_id(SSL *to, const SSL *from); +__owur X509 *SSL_SESSION_get0_peer(SSL_SESSION *s); +__owur int SSL_SESSION_set1_id_context(SSL_SESSION *s, + const unsigned char *sid_ctx, + unsigned int sid_ctx_len); +__owur int SSL_SESSION_set1_id(SSL_SESSION *s, const unsigned char *sid, + unsigned int sid_len); +__owur int SSL_SESSION_is_resumable(const SSL_SESSION *s); + +__owur SSL_SESSION *SSL_SESSION_new(void); +__owur SSL_SESSION *SSL_SESSION_dup(const SSL_SESSION *src); +const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s, + unsigned int *len); +const unsigned char *SSL_SESSION_get0_id_context(const SSL_SESSION *s, + unsigned int *len); +__owur unsigned int SSL_SESSION_get_compress_id(const SSL_SESSION *s); +# ifndef OPENSSL_NO_STDIO +int SSL_SESSION_print_fp(FILE *fp, const SSL_SESSION *ses); +# endif +int SSL_SESSION_print(BIO *fp, const SSL_SESSION *ses); +int SSL_SESSION_print_keylog(BIO *bp, const SSL_SESSION *x); +int SSL_SESSION_up_ref(SSL_SESSION *ses); +void SSL_SESSION_free(SSL_SESSION *ses); +__owur int i2d_SSL_SESSION(const SSL_SESSION *in, unsigned char **pp); +__owur int SSL_set_session(SSL *to, SSL_SESSION *session); +int SSL_CTX_add_session(SSL_CTX *ctx, SSL_SESSION *session); +int SSL_CTX_remove_session(SSL_CTX *ctx, SSL_SESSION *session); +__owur int SSL_CTX_set_generate_session_id(SSL_CTX *ctx, GEN_SESSION_CB cb); +__owur int SSL_set_generate_session_id(SSL *s, GEN_SESSION_CB cb); +__owur int SSL_has_matching_session_id(const SSL *s, + const unsigned char *id, + unsigned int id_len); +SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, + long length); +SSL_SESSION *d2i_SSL_SESSION_ex(SSL_SESSION **a, const unsigned char **pp, + long length, OSSL_LIB_CTX *libctx, + const char *propq); + +# ifdef OPENSSL_X509_H +__owur X509 *SSL_get0_peer_certificate(const SSL *s); +__owur X509 *SSL_get1_peer_certificate(const SSL *s); +/* Deprecated in 3.0.0 */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_get_peer_certificate SSL_get1_peer_certificate +# endif +# endif + +__owur STACK_OF(X509) *SSL_get_peer_cert_chain(const SSL *s); + +__owur int SSL_CTX_get_verify_mode(const SSL_CTX *ctx); +__owur int SSL_CTX_get_verify_depth(const SSL_CTX *ctx); +__owur SSL_verify_cb SSL_CTX_get_verify_callback(const SSL_CTX *ctx); +void SSL_CTX_set_verify(SSL_CTX *ctx, int mode, SSL_verify_cb callback); +void SSL_CTX_set_verify_depth(SSL_CTX *ctx, int depth); +void SSL_CTX_set_cert_verify_callback(SSL_CTX *ctx, + int (*cb) (X509_STORE_CTX *, void *), + void *arg); +void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cb) (SSL *ssl, void *arg), + void *arg); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey(SSL_CTX *ctx, RSA *rsa); +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey_ASN1(SSL_CTX *ctx, const unsigned char *d, + long len); +# endif +__owur int SSL_CTX_use_PrivateKey(SSL_CTX *ctx, EVP_PKEY *pkey); +__owur int SSL_CTX_use_PrivateKey_ASN1(int pk, SSL_CTX *ctx, + const unsigned char *d, long len); +__owur int SSL_CTX_use_certificate(SSL_CTX *ctx, X509 *x); +__owur int SSL_CTX_use_certificate_ASN1(SSL_CTX *ctx, int len, + const unsigned char *d); +__owur int SSL_CTX_use_cert_and_key(SSL_CTX *ctx, X509 *x509, EVP_PKEY *privatekey, + STACK_OF(X509) *chain, int override); + +void SSL_CTX_set_default_passwd_cb(SSL_CTX *ctx, pem_password_cb *cb); +void SSL_CTX_set_default_passwd_cb_userdata(SSL_CTX *ctx, void *u); +pem_password_cb *SSL_CTX_get_default_passwd_cb(SSL_CTX *ctx); +void *SSL_CTX_get_default_passwd_cb_userdata(SSL_CTX *ctx); +void SSL_set_default_passwd_cb(SSL *s, pem_password_cb *cb); +void SSL_set_default_passwd_cb_userdata(SSL *s, void *u); +pem_password_cb *SSL_get_default_passwd_cb(SSL *s); +void *SSL_get_default_passwd_cb_userdata(SSL *s); + +__owur int SSL_CTX_check_private_key(const SSL_CTX *ctx); +__owur int SSL_check_private_key(const SSL *ctx); + +__owur int SSL_CTX_set_session_id_context(SSL_CTX *ctx, + const unsigned char *sid_ctx, + unsigned int sid_ctx_len); + +SSL *SSL_new(SSL_CTX *ctx); +int SSL_up_ref(SSL *s); +int SSL_is_dtls(const SSL *s); +int SSL_is_tls(const SSL *s); +int SSL_is_quic(const SSL *s); +__owur int SSL_set_session_id_context(SSL *ssl, const unsigned char *sid_ctx, + unsigned int sid_ctx_len); + +__owur int SSL_CTX_set_purpose(SSL_CTX *ctx, int purpose); +__owur int SSL_set_purpose(SSL *ssl, int purpose); +__owur int SSL_CTX_set_trust(SSL_CTX *ctx, int trust); +__owur int SSL_set_trust(SSL *ssl, int trust); + +__owur int SSL_set1_host(SSL *s, const char *hostname); +__owur int SSL_add1_host(SSL *s, const char *hostname); +__owur const char *SSL_get0_peername(SSL *s); +void SSL_set_hostflags(SSL *s, unsigned int flags); + +__owur int SSL_CTX_dane_enable(SSL_CTX *ctx); +__owur int SSL_CTX_dane_mtype_set(SSL_CTX *ctx, const EVP_MD *md, + uint8_t mtype, uint8_t ord); +__owur int SSL_dane_enable(SSL *s, const char *basedomain); +__owur int SSL_dane_tlsa_add(SSL *s, uint8_t usage, uint8_t selector, + uint8_t mtype, const unsigned char *data, size_t dlen); +__owur int SSL_get0_dane_authority(SSL *s, X509 **mcert, EVP_PKEY **mspki); +__owur int SSL_get0_dane_tlsa(SSL *s, uint8_t *usage, uint8_t *selector, + uint8_t *mtype, const unsigned char **data, + size_t *dlen); +/* + * Bridge opacity barrier between libcrypt and libssl, also needed to support + * offline testing in test/danetest.c + */ +SSL_DANE *SSL_get0_dane(SSL *ssl); +/* + * DANE flags + */ +unsigned long SSL_CTX_dane_set_flags(SSL_CTX *ctx, unsigned long flags); +unsigned long SSL_CTX_dane_clear_flags(SSL_CTX *ctx, unsigned long flags); +unsigned long SSL_dane_set_flags(SSL *ssl, unsigned long flags); +unsigned long SSL_dane_clear_flags(SSL *ssl, unsigned long flags); + +__owur int SSL_CTX_set1_param(SSL_CTX *ctx, X509_VERIFY_PARAM *vpm); +__owur int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm); + +__owur X509_VERIFY_PARAM *SSL_CTX_get0_param(SSL_CTX *ctx); +__owur X509_VERIFY_PARAM *SSL_get0_param(SSL *ssl); + +# ifndef OPENSSL_NO_SRP +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_username(SSL_CTX *ctx, char *name); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_password(SSL_CTX *ctx, char *password); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_strength(SSL_CTX *ctx, int strength); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_client_pwd_callback(SSL_CTX *ctx, + char *(*cb) (SSL *, void *)); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_verify_param_callback(SSL_CTX *ctx, + int (*cb) (SSL *, void *)); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_username_callback(SSL_CTX *ctx, + int (*cb) (SSL *, int *, void *)); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_cb_arg(SSL_CTX *ctx, void *arg); + +OSSL_DEPRECATEDIN_3_0 +int SSL_set_srp_server_param(SSL *s, const BIGNUM *N, const BIGNUM *g, + BIGNUM *sa, BIGNUM *v, char *info); +OSSL_DEPRECATEDIN_3_0 +int SSL_set_srp_server_param_pw(SSL *s, const char *user, const char *pass, + const char *grp); + +OSSL_DEPRECATEDIN_3_0 __owur BIGNUM *SSL_get_srp_g(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur BIGNUM *SSL_get_srp_N(SSL *s); + +OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_username(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_userinfo(SSL *s); +# endif +# endif + +/* + * ClientHello callback and helpers. + */ + +# define SSL_CLIENT_HELLO_SUCCESS 1 +# define SSL_CLIENT_HELLO_ERROR 0 +# define SSL_CLIENT_HELLO_RETRY (-1) + +typedef int (*SSL_client_hello_cb_fn) (SSL *s, int *al, void *arg); +void SSL_CTX_set_client_hello_cb(SSL_CTX *c, SSL_client_hello_cb_fn cb, + void *arg); +int SSL_client_hello_isv2(SSL *s); +unsigned int SSL_client_hello_get0_legacy_version(SSL *s); +size_t SSL_client_hello_get0_random(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_session_id(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_ciphers(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_compression_methods(SSL *s, + const unsigned char **out); +int SSL_client_hello_get1_extensions_present(SSL *s, int **out, size_t *outlen); +int SSL_client_hello_get_extension_order(SSL *s, uint16_t *exts, + size_t *num_exts); +int SSL_client_hello_get0_ext(SSL *s, unsigned int type, + const unsigned char **out, size_t *outlen); + +void SSL_certs_clear(SSL *s); +void SSL_free(SSL *ssl); +# ifdef OSSL_ASYNC_FD +/* + * Windows application developer has to include windows.h to use these. + */ +__owur int SSL_waiting_for_async(SSL *s); +__owur int SSL_get_all_async_fds(SSL *s, OSSL_ASYNC_FD *fds, size_t *numfds); +__owur int SSL_get_changed_async_fds(SSL *s, OSSL_ASYNC_FD *addfd, + size_t *numaddfds, OSSL_ASYNC_FD *delfd, + size_t *numdelfds); +__owur int SSL_CTX_set_async_callback(SSL_CTX *ctx, SSL_async_callback_fn callback); +__owur int SSL_CTX_set_async_callback_arg(SSL_CTX *ctx, void *arg); +__owur int SSL_set_async_callback(SSL *s, SSL_async_callback_fn callback); +__owur int SSL_set_async_callback_arg(SSL *s, void *arg); +__owur int SSL_get_async_status(SSL *s, int *status); + +# endif +__owur int SSL_accept(SSL *ssl); +__owur int SSL_stateless(SSL *s); +__owur int SSL_connect(SSL *ssl); +__owur int SSL_read(SSL *ssl, void *buf, int num); +__owur int SSL_read_ex(SSL *ssl, void *buf, size_t num, size_t *readbytes); + +# define SSL_READ_EARLY_DATA_ERROR 0 +# define SSL_READ_EARLY_DATA_SUCCESS 1 +# define SSL_READ_EARLY_DATA_FINISH 2 + +__owur int SSL_read_early_data(SSL *s, void *buf, size_t num, + size_t *readbytes); +__owur int SSL_peek(SSL *ssl, void *buf, int num); +__owur int SSL_peek_ex(SSL *ssl, void *buf, size_t num, size_t *readbytes); +__owur ossl_ssize_t SSL_sendfile(SSL *s, int fd, off_t offset, size_t size, + int flags); +__owur int SSL_write(SSL *ssl, const void *buf, int num); +__owur int SSL_write_ex(SSL *s, const void *buf, size_t num, size_t *written); +__owur int SSL_write_early_data(SSL *s, const void *buf, size_t num, + size_t *written); +long SSL_ctrl(SSL *ssl, int cmd, long larg, void *parg); +long SSL_callback_ctrl(SSL *, int, void (*)(void)); +long SSL_CTX_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg); +long SSL_CTX_callback_ctrl(SSL_CTX *, int, void (*)(void)); + +# define SSL_EARLY_DATA_NOT_SENT 0 +# define SSL_EARLY_DATA_REJECTED 1 +# define SSL_EARLY_DATA_ACCEPTED 2 + +__owur int SSL_get_early_data_status(const SSL *s); + +__owur int SSL_get_error(const SSL *s, int ret_code); +__owur const char *SSL_get_version(const SSL *s); +__owur int SSL_get_handshake_rtt(const SSL *s, uint64_t *rtt); + +/* This sets the 'default' SSL version that SSL_new() will create */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_set_ssl_version(SSL_CTX *ctx, const SSL_METHOD *meth); +# endif + +# ifndef OPENSSL_NO_SSL3_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_method(void); /* SSLv3 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_client_method(void); +# endif +# endif + +#define SSLv23_method TLS_method +#define SSLv23_server_method TLS_server_method +#define SSLv23_client_method TLS_client_method + +/* Negotiate highest available SSL/TLS version */ +__owur const SSL_METHOD *TLS_method(void); +__owur const SSL_METHOD *TLS_server_method(void); +__owur const SSL_METHOD *TLS_client_method(void); + +# ifndef OPENSSL_NO_TLS1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_method(void); /* TLSv1.0 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_TLS1_1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_method(void); /* TLSv1.1 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_TLS1_2_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_method(void); /* TLSv1.2 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_DTLS1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_method(void); /* DTLSv1.0 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_DTLS1_2_METHOD +/* DTLSv1.2 */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_client_method(void); +# endif +# endif + +__owur const SSL_METHOD *DTLS_method(void); /* DTLS 1.0 and 1.2 */ +__owur const SSL_METHOD *DTLS_server_method(void); /* DTLS 1.0 and 1.2 */ +__owur const SSL_METHOD *DTLS_client_method(void); /* DTLS 1.0 and 1.2 */ + +__owur size_t DTLS_get_data_mtu(const SSL *s); + +__owur STACK_OF(SSL_CIPHER) *SSL_get_ciphers(const SSL *s); +__owur STACK_OF(SSL_CIPHER) *SSL_CTX_get_ciphers(const SSL_CTX *ctx); +__owur STACK_OF(SSL_CIPHER) *SSL_get_client_ciphers(const SSL *s); +__owur STACK_OF(SSL_CIPHER) *SSL_get1_supported_ciphers(SSL *s); + +__owur int SSL_do_handshake(SSL *s); +int SSL_key_update(SSL *s, int updatetype); +int SSL_get_key_update_type(const SSL *s); +int SSL_renegotiate(SSL *s); +int SSL_renegotiate_abbreviated(SSL *s); +__owur int SSL_renegotiate_pending(const SSL *s); +int SSL_new_session_ticket(SSL *s); +int SSL_shutdown(SSL *s); +__owur int SSL_verify_client_post_handshake(SSL *s); +void SSL_CTX_set_post_handshake_auth(SSL_CTX *ctx, int val); +void SSL_set_post_handshake_auth(SSL *s, int val); + +__owur const SSL_METHOD *SSL_CTX_get_ssl_method(const SSL_CTX *ctx); +__owur const SSL_METHOD *SSL_get_ssl_method(const SSL *s); +__owur int SSL_set_ssl_method(SSL *s, const SSL_METHOD *method); +__owur const char *SSL_alert_type_string_long(int value); +__owur const char *SSL_alert_type_string(int value); +__owur const char *SSL_alert_desc_string_long(int value); +__owur const char *SSL_alert_desc_string(int value); + +void SSL_set0_CA_list(SSL *s, STACK_OF(X509_NAME) *name_list); +void SSL_CTX_set0_CA_list(SSL_CTX *ctx, STACK_OF(X509_NAME) *name_list); +__owur const STACK_OF(X509_NAME) *SSL_get0_CA_list(const SSL *s); +__owur const STACK_OF(X509_NAME) *SSL_CTX_get0_CA_list(const SSL_CTX *ctx); +__owur int SSL_add1_to_CA_list(SSL *ssl, const X509 *x); +__owur int SSL_CTX_add1_to_CA_list(SSL_CTX *ctx, const X509 *x); +__owur const STACK_OF(X509_NAME) *SSL_get0_peer_CA_list(const SSL *s); + +void SSL_set_client_CA_list(SSL *s, STACK_OF(X509_NAME) *name_list); +void SSL_CTX_set_client_CA_list(SSL_CTX *ctx, STACK_OF(X509_NAME) *name_list); +__owur STACK_OF(X509_NAME) *SSL_get_client_CA_list(const SSL *s); +__owur STACK_OF(X509_NAME) *SSL_CTX_get_client_CA_list(const SSL_CTX *s); +__owur int SSL_add_client_CA(SSL *ssl, X509 *x); +__owur int SSL_CTX_add_client_CA(SSL_CTX *ctx, X509 *x); + +void SSL_set_connect_state(SSL *s); +void SSL_set_accept_state(SSL *s); + +__owur long SSL_get_default_timeout(const SSL *s); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_library_init() OPENSSL_init_ssl(0, NULL) +# endif + +__owur char *SSL_CIPHER_description(const SSL_CIPHER *, char *buf, int size); +__owur STACK_OF(X509_NAME) *SSL_dup_CA_list(const STACK_OF(X509_NAME) *sk); + +__owur SSL *SSL_dup(SSL *ssl); + +__owur X509 *SSL_get_certificate(const SSL *ssl); +/* + * EVP_PKEY + */ +struct evp_pkey_st *SSL_get_privatekey(const SSL *ssl); + +__owur X509 *SSL_CTX_get0_certificate(const SSL_CTX *ctx); +__owur EVP_PKEY *SSL_CTX_get0_privatekey(const SSL_CTX *ctx); + +void SSL_CTX_set_quiet_shutdown(SSL_CTX *ctx, int mode); +__owur int SSL_CTX_get_quiet_shutdown(const SSL_CTX *ctx); +void SSL_set_quiet_shutdown(SSL *ssl, int mode); +__owur int SSL_get_quiet_shutdown(const SSL *ssl); +void SSL_set_shutdown(SSL *ssl, int mode); +__owur int SSL_get_shutdown(const SSL *ssl); +__owur int SSL_version(const SSL *ssl); +__owur int SSL_client_version(const SSL *s); +__owur int SSL_CTX_set_default_verify_paths(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_dir(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_file(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_store(SSL_CTX *ctx); +__owur int SSL_CTX_load_verify_file(SSL_CTX *ctx, const char *CAfile); +__owur int SSL_CTX_load_verify_dir(SSL_CTX *ctx, const char *CApath); +__owur int SSL_CTX_load_verify_store(SSL_CTX *ctx, const char *CAstore); +__owur int SSL_CTX_load_verify_locations(SSL_CTX *ctx, + const char *CAfile, + const char *CApath); +# define SSL_get0_session SSL_get_session/* just peek at pointer */ +__owur SSL_SESSION *SSL_get_session(const SSL *ssl); +__owur SSL_SESSION *SSL_get1_session(SSL *ssl); /* obtain a reference count */ +__owur SSL_CTX *SSL_get_SSL_CTX(const SSL *ssl); +SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX *ctx); +void SSL_set_info_callback(SSL *ssl, + void (*cb) (const SSL *ssl, int type, int val)); +void (*SSL_get_info_callback(const SSL *ssl)) (const SSL *ssl, int type, + int val); +__owur OSSL_HANDSHAKE_STATE SSL_get_state(const SSL *ssl); + +void SSL_set_verify_result(SSL *ssl, long v); +__owur long SSL_get_verify_result(const SSL *ssl); +__owur STACK_OF(X509) *SSL_get0_verified_chain(const SSL *s); + +__owur size_t SSL_get_client_random(const SSL *ssl, unsigned char *out, + size_t outlen); +__owur size_t SSL_get_server_random(const SSL *ssl, unsigned char *out, + size_t outlen); +__owur size_t SSL_SESSION_get_master_key(const SSL_SESSION *sess, + unsigned char *out, size_t outlen); +__owur int SSL_SESSION_set1_master_key(SSL_SESSION *sess, + const unsigned char *in, size_t len); +uint8_t SSL_SESSION_get_max_fragment_length(const SSL_SESSION *sess); + +#define SSL_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL, l, p, newf, dupf, freef) +__owur int SSL_set_ex_data(SSL *ssl, int idx, void *data); +void *SSL_get_ex_data(const SSL *ssl, int idx); +#define SSL_SESSION_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL_SESSION, l, p, newf, dupf, freef) +__owur int SSL_SESSION_set_ex_data(SSL_SESSION *ss, int idx, void *data); +void *SSL_SESSION_get_ex_data(const SSL_SESSION *ss, int idx); +#define SSL_CTX_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL_CTX, l, p, newf, dupf, freef) +__owur int SSL_CTX_set_ex_data(SSL_CTX *ssl, int idx, void *data); +void *SSL_CTX_get_ex_data(const SSL_CTX *ssl, int idx); + +__owur int SSL_get_ex_data_X509_STORE_CTX_idx(void); + +# define SSL_CTX_sess_set_cache_size(ctx,t) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SESS_CACHE_SIZE,t,NULL) +# define SSL_CTX_sess_get_cache_size(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_SESS_CACHE_SIZE,0,NULL) +# define SSL_CTX_set_session_cache_mode(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SESS_CACHE_MODE,m,NULL) +# define SSL_CTX_get_session_cache_mode(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_SESS_CACHE_MODE,0,NULL) + +# define SSL_CTX_get_default_read_ahead(ctx) SSL_CTX_get_read_ahead(ctx) +# define SSL_CTX_set_default_read_ahead(ctx,m) SSL_CTX_set_read_ahead(ctx,m) +# define SSL_CTX_get_read_ahead(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_READ_AHEAD,0,NULL) +# define SSL_CTX_set_read_ahead(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_READ_AHEAD,m,NULL) +# define SSL_CTX_get_max_cert_list(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_MAX_CERT_LIST,0,NULL) +# define SSL_CTX_set_max_cert_list(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_CERT_LIST,m,NULL) +# define SSL_get_max_cert_list(ssl) \ + SSL_ctrl(ssl,SSL_CTRL_GET_MAX_CERT_LIST,0,NULL) +# define SSL_set_max_cert_list(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_CERT_LIST,m,NULL) + +# define SSL_CTX_set_max_send_fragment(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_SEND_FRAGMENT,m,NULL) +# define SSL_set_max_send_fragment(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_SEND_FRAGMENT,m,NULL) +# define SSL_CTX_set_split_send_fragment(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SPLIT_SEND_FRAGMENT,m,NULL) +# define SSL_set_split_send_fragment(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_SPLIT_SEND_FRAGMENT,m,NULL) +# define SSL_CTX_set_max_pipelines(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_PIPELINES,m,NULL) +# define SSL_set_max_pipelines(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_PIPELINES,m,NULL) +# define SSL_set_retry_verify(ssl) \ + (SSL_ctrl(ssl,SSL_CTRL_SET_RETRY_VERIFY,0,NULL) > 0) + +void SSL_CTX_set_default_read_buffer_len(SSL_CTX *ctx, size_t len); +void SSL_set_default_read_buffer_len(SSL *s, size_t len); + +# ifndef OPENSSL_NO_DH +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* NB: the |keylength| is only applicable when is_export is true */ +OSSL_DEPRECATEDIN_3_0 +void SSL_CTX_set_tmp_dh_callback(SSL_CTX *ctx, + DH *(*dh) (SSL *ssl, int is_export, + int keylength)); +OSSL_DEPRECATEDIN_3_0 +void SSL_set_tmp_dh_callback(SSL *ssl, + DH *(*dh) (SSL *ssl, int is_export, + int keylength)); +# endif +# endif + +__owur const COMP_METHOD *SSL_get_current_compression(const SSL *s); +__owur const COMP_METHOD *SSL_get_current_expansion(const SSL *s); +__owur const char *SSL_COMP_get_name(const COMP_METHOD *comp); +__owur const char *SSL_COMP_get0_name(const SSL_COMP *comp); +__owur int SSL_COMP_get_id(const SSL_COMP *comp); +STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void); +__owur STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP) + *meths); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_COMP_free_compression_methods() while(0) continue +# endif +__owur int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm); + +const SSL_CIPHER *SSL_CIPHER_find(SSL *ssl, const unsigned char *ptr); +int SSL_CIPHER_get_cipher_nid(const SSL_CIPHER *c); +int SSL_CIPHER_get_digest_nid(const SSL_CIPHER *c); +int SSL_bytes_to_cipher_list(SSL *s, const unsigned char *bytes, size_t len, + int isv2format, STACK_OF(SSL_CIPHER) **sk, + STACK_OF(SSL_CIPHER) **scsvs); + +/* TLS extensions functions */ +__owur int SSL_set_session_ticket_ext(SSL *s, void *ext_data, int ext_len); + +__owur int SSL_set_session_ticket_ext_cb(SSL *s, + tls_session_ticket_ext_cb_fn cb, + void *arg); + +/* Pre-shared secret session resumption functions */ +__owur int SSL_set_session_secret_cb(SSL *s, + tls_session_secret_cb_fn session_secret_cb, + void *arg); + +void SSL_CTX_set_not_resumable_session_callback(SSL_CTX *ctx, + int (*cb) (SSL *ssl, + int + is_forward_secure)); + +void SSL_set_not_resumable_session_callback(SSL *ssl, + int (*cb) (SSL *ssl, + int is_forward_secure)); + +void SSL_CTX_set_record_padding_callback(SSL_CTX *ctx, + size_t (*cb) (SSL *ssl, int type, + size_t len, void *arg)); +void SSL_CTX_set_record_padding_callback_arg(SSL_CTX *ctx, void *arg); +void *SSL_CTX_get_record_padding_callback_arg(const SSL_CTX *ctx); +int SSL_CTX_set_block_padding(SSL_CTX *ctx, size_t block_size); + +int SSL_set_record_padding_callback(SSL *ssl, + size_t (*cb) (SSL *ssl, int type, + size_t len, void *arg)); +void SSL_set_record_padding_callback_arg(SSL *ssl, void *arg); +void *SSL_get_record_padding_callback_arg(const SSL *ssl); +int SSL_set_block_padding(SSL *ssl, size_t block_size); + +int SSL_set_num_tickets(SSL *s, size_t num_tickets); +size_t SSL_get_num_tickets(const SSL *s); +int SSL_CTX_set_num_tickets(SSL_CTX *ctx, size_t num_tickets); +size_t SSL_CTX_get_num_tickets(const SSL_CTX *ctx); + +/* QUIC support */ +int SSL_handle_events(SSL *s); +__owur int SSL_get_event_timeout(SSL *s, struct timeval *tv, int *is_infinite); +__owur int SSL_get_rpoll_descriptor(SSL *s, BIO_POLL_DESCRIPTOR *desc); +__owur int SSL_get_wpoll_descriptor(SSL *s, BIO_POLL_DESCRIPTOR *desc); +__owur int SSL_net_read_desired(SSL *s); +__owur int SSL_net_write_desired(SSL *s); +__owur int SSL_set_blocking_mode(SSL *s, int blocking); +__owur int SSL_get_blocking_mode(SSL *s); +__owur int SSL_set1_initial_peer_addr(SSL *s, const BIO_ADDR *peer_addr); +__owur SSL *SSL_get0_connection(SSL *s); +__owur int SSL_is_connection(SSL *s); + +#define SSL_STREAM_TYPE_NONE 0 +#define SSL_STREAM_TYPE_READ (1U << 0) +#define SSL_STREAM_TYPE_WRITE (1U << 1) +#define SSL_STREAM_TYPE_BIDI (SSL_STREAM_TYPE_READ | SSL_STREAM_TYPE_WRITE) +__owur int SSL_get_stream_type(SSL *s); + +__owur uint64_t SSL_get_stream_id(SSL *s); +__owur int SSL_is_stream_local(SSL *s); + +#define SSL_DEFAULT_STREAM_MODE_NONE 0 +#define SSL_DEFAULT_STREAM_MODE_AUTO_BIDI 1 +#define SSL_DEFAULT_STREAM_MODE_AUTO_UNI 2 +__owur int SSL_set_default_stream_mode(SSL *s, uint32_t mode); + +#define SSL_STREAM_FLAG_UNI (1U << 0) +#define SSL_STREAM_FLAG_NO_BLOCK (1U << 1) +#define SSL_STREAM_FLAG_ADVANCE (1U << 2) +__owur SSL *SSL_new_stream(SSL *s, uint64_t flags); + +#define SSL_INCOMING_STREAM_POLICY_AUTO 0 +#define SSL_INCOMING_STREAM_POLICY_ACCEPT 1 +#define SSL_INCOMING_STREAM_POLICY_REJECT 2 +__owur int SSL_set_incoming_stream_policy(SSL *s, int policy, uint64_t aec); + +#define SSL_ACCEPT_STREAM_NO_BLOCK (1U << 0) +__owur SSL *SSL_accept_stream(SSL *s, uint64_t flags); +__owur size_t SSL_get_accept_stream_queue_len(SSL *s); + +# ifndef OPENSSL_NO_QUIC +__owur int SSL_inject_net_dgram(SSL *s, const unsigned char *buf, + size_t buf_len, + const BIO_ADDR *peer, + const BIO_ADDR *local); +# endif + +typedef struct ssl_shutdown_ex_args_st { + uint64_t quic_error_code; + const char *quic_reason; +} SSL_SHUTDOWN_EX_ARGS; + +#define SSL_SHUTDOWN_FLAG_RAPID (1U << 0) +#define SSL_SHUTDOWN_FLAG_NO_STREAM_FLUSH (1U << 1) +#define SSL_SHUTDOWN_FLAG_NO_BLOCK (1U << 2) +#define SSL_SHUTDOWN_FLAG_WAIT_PEER (1U << 3) + +__owur int SSL_shutdown_ex(SSL *ssl, uint64_t flags, + const SSL_SHUTDOWN_EX_ARGS *args, + size_t args_len); + +__owur int SSL_stream_conclude(SSL *ssl, uint64_t flags); + +typedef struct ssl_stream_reset_args_st { + uint64_t quic_error_code; +} SSL_STREAM_RESET_ARGS; + +__owur int SSL_stream_reset(SSL *ssl, + const SSL_STREAM_RESET_ARGS *args, + size_t args_len); + +#define SSL_STREAM_STATE_NONE 0 +#define SSL_STREAM_STATE_OK 1 +#define SSL_STREAM_STATE_WRONG_DIR 2 +#define SSL_STREAM_STATE_FINISHED 3 +#define SSL_STREAM_STATE_RESET_LOCAL 4 +#define SSL_STREAM_STATE_RESET_REMOTE 5 +#define SSL_STREAM_STATE_CONN_CLOSED 6 +__owur int SSL_get_stream_read_state(SSL *ssl); +__owur int SSL_get_stream_write_state(SSL *ssl); + +__owur int SSL_get_stream_read_error_code(SSL *ssl, uint64_t *app_error_code); +__owur int SSL_get_stream_write_error_code(SSL *ssl, uint64_t *app_error_code); + +#define SSL_CONN_CLOSE_FLAG_LOCAL (1U << 0) +#define SSL_CONN_CLOSE_FLAG_TRANSPORT (1U << 1) + +typedef struct ssl_conn_close_info_st { + uint64_t error_code, frame_type; + const char *reason; + size_t reason_len; + uint32_t flags; +} SSL_CONN_CLOSE_INFO; + +__owur int SSL_get_conn_close_info(SSL *ssl, + SSL_CONN_CLOSE_INFO *info, + size_t info_len); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_cache_hit(s) SSL_session_reused(s) +# endif + +__owur int SSL_session_reused(const SSL *s); +__owur int SSL_is_server(const SSL *s); + +__owur __owur SSL_CONF_CTX *SSL_CONF_CTX_new(void); +int SSL_CONF_CTX_finish(SSL_CONF_CTX *cctx); +void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx); +unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags); +__owur unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, + unsigned int flags); +__owur int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *pre); + +void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl); +void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx); + +__owur int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value); +__owur int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv); +__owur int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd); + +void SSL_add_ssl_module(void); +int SSL_config(SSL *s, const char *name); +int SSL_CTX_config(SSL_CTX *ctx, const char *name); + +# ifndef OPENSSL_NO_SSL_TRACE +void SSL_trace(int write_p, int version, int content_type, + const void *buf, size_t len, SSL *ssl, void *arg); +# endif + +# ifndef OPENSSL_NO_SOCK +int DTLSv1_listen(SSL *s, BIO_ADDR *client); +# endif + +# ifndef OPENSSL_NO_CT + +/* + * A callback for verifying that the received SCTs are sufficient. + * Expected to return 1 if they are sufficient, otherwise 0. + * May return a negative integer if an error occurs. + * A connection should be aborted if the SCTs are deemed insufficient. + */ +typedef int (*ssl_ct_validation_cb)(const CT_POLICY_EVAL_CTX *ctx, + const STACK_OF(SCT) *scts, void *arg); + +/* + * Sets a |callback| that is invoked upon receipt of ServerHelloDone to validate + * the received SCTs. + * If the callback returns a non-positive result, the connection is terminated. + * Call this function before beginning a handshake. + * If a NULL |callback| is provided, SCT validation is disabled. + * |arg| is arbitrary userdata that will be passed to the callback whenever it + * is invoked. Ownership of |arg| remains with the caller. + * + * NOTE: A side-effect of setting a CT callback is that an OCSP stapled response + * will be requested. + */ +int SSL_set_ct_validation_callback(SSL *s, ssl_ct_validation_cb callback, + void *arg); +int SSL_CTX_set_ct_validation_callback(SSL_CTX *ctx, + ssl_ct_validation_cb callback, + void *arg); +#define SSL_disable_ct(s) \ + ((void) SSL_set_validation_callback((s), NULL, NULL)) +#define SSL_CTX_disable_ct(ctx) \ + ((void) SSL_CTX_set_validation_callback((ctx), NULL, NULL)) + +/* + * The validation type enumerates the available behaviours of the built-in SSL + * CT validation callback selected via SSL_enable_ct() and SSL_CTX_enable_ct(). + * The underlying callback is a static function in libssl. + */ +enum { + SSL_CT_VALIDATION_PERMISSIVE = 0, + SSL_CT_VALIDATION_STRICT +}; + +/* + * Enable CT by setting up a callback that implements one of the built-in + * validation variants. The SSL_CT_VALIDATION_PERMISSIVE variant always + * continues the handshake, the application can make appropriate decisions at + * handshake completion. The SSL_CT_VALIDATION_STRICT variant requires at + * least one valid SCT, or else handshake termination will be requested. The + * handshake may continue anyway if SSL_VERIFY_NONE is in effect. + */ +int SSL_enable_ct(SSL *s, int validation_mode); +int SSL_CTX_enable_ct(SSL_CTX *ctx, int validation_mode); + +/* + * Report whether a non-NULL callback is enabled. + */ +int SSL_ct_is_enabled(const SSL *s); +int SSL_CTX_ct_is_enabled(const SSL_CTX *ctx); + +/* Gets the SCTs received from a connection */ +const STACK_OF(SCT) *SSL_get0_peer_scts(SSL *s); + +/* + * Loads the CT log list from the default location. + * If a CTLOG_STORE has previously been set using SSL_CTX_set_ctlog_store, + * the log information loaded from this file will be appended to the + * CTLOG_STORE. + * Returns 1 on success, 0 otherwise. + */ +int SSL_CTX_set_default_ctlog_list_file(SSL_CTX *ctx); + +/* + * Loads the CT log list from the specified file path. + * If a CTLOG_STORE has previously been set using SSL_CTX_set_ctlog_store, + * the log information loaded from this file will be appended to the + * CTLOG_STORE. + * Returns 1 on success, 0 otherwise. + */ +int SSL_CTX_set_ctlog_list_file(SSL_CTX *ctx, const char *path); + +/* + * Sets the CT log list used by all SSL connections created from this SSL_CTX. + * Ownership of the CTLOG_STORE is transferred to the SSL_CTX. + */ +void SSL_CTX_set0_ctlog_store(SSL_CTX *ctx, CTLOG_STORE *logs); + +/* + * Gets the CT log list used by all SSL connections created from this SSL_CTX. + * This will be NULL unless one of the following functions has been called: + * - SSL_CTX_set_default_ctlog_list_file + * - SSL_CTX_set_ctlog_list_file + * - SSL_CTX_set_ctlog_store + */ +const CTLOG_STORE *SSL_CTX_get0_ctlog_store(const SSL_CTX *ctx); + +# endif /* OPENSSL_NO_CT */ + +/* What the "other" parameter contains in security callback */ +/* Mask for type */ +# define SSL_SECOP_OTHER_TYPE 0xffff0000 +# define SSL_SECOP_OTHER_NONE 0 +# define SSL_SECOP_OTHER_CIPHER (1 << 16) +# define SSL_SECOP_OTHER_CURVE (2 << 16) +# define SSL_SECOP_OTHER_DH (3 << 16) +# define SSL_SECOP_OTHER_PKEY (4 << 16) +# define SSL_SECOP_OTHER_SIGALG (5 << 16) +# define SSL_SECOP_OTHER_CERT (6 << 16) + +/* Indicated operation refers to peer key or certificate */ +# define SSL_SECOP_PEER 0x1000 + +/* Values for "op" parameter in security callback */ + +/* Called to filter ciphers */ +/* Ciphers client supports */ +# define SSL_SECOP_CIPHER_SUPPORTED (1 | SSL_SECOP_OTHER_CIPHER) +/* Cipher shared by client/server */ +# define SSL_SECOP_CIPHER_SHARED (2 | SSL_SECOP_OTHER_CIPHER) +/* Sanity check of cipher server selects */ +# define SSL_SECOP_CIPHER_CHECK (3 | SSL_SECOP_OTHER_CIPHER) +/* Curves supported by client */ +# define SSL_SECOP_CURVE_SUPPORTED (4 | SSL_SECOP_OTHER_CURVE) +/* Curves shared by client/server */ +# define SSL_SECOP_CURVE_SHARED (5 | SSL_SECOP_OTHER_CURVE) +/* Sanity check of curve server selects */ +# define SSL_SECOP_CURVE_CHECK (6 | SSL_SECOP_OTHER_CURVE) +/* Temporary DH key */ +# define SSL_SECOP_TMP_DH (7 | SSL_SECOP_OTHER_PKEY) +/* SSL/TLS version */ +# define SSL_SECOP_VERSION (9 | SSL_SECOP_OTHER_NONE) +/* Session tickets */ +# define SSL_SECOP_TICKET (10 | SSL_SECOP_OTHER_NONE) +/* Supported signature algorithms sent to peer */ +# define SSL_SECOP_SIGALG_SUPPORTED (11 | SSL_SECOP_OTHER_SIGALG) +/* Shared signature algorithm */ +# define SSL_SECOP_SIGALG_SHARED (12 | SSL_SECOP_OTHER_SIGALG) +/* Sanity check signature algorithm allowed */ +# define SSL_SECOP_SIGALG_CHECK (13 | SSL_SECOP_OTHER_SIGALG) +/* Used to get mask of supported public key signature algorithms */ +# define SSL_SECOP_SIGALG_MASK (14 | SSL_SECOP_OTHER_SIGALG) +/* Use to see if compression is allowed */ +# define SSL_SECOP_COMPRESSION (15 | SSL_SECOP_OTHER_NONE) +/* EE key in certificate */ +# define SSL_SECOP_EE_KEY (16 | SSL_SECOP_OTHER_CERT) +/* CA key in certificate */ +# define SSL_SECOP_CA_KEY (17 | SSL_SECOP_OTHER_CERT) +/* CA digest algorithm in certificate */ +# define SSL_SECOP_CA_MD (18 | SSL_SECOP_OTHER_CERT) +/* Peer EE key in certificate */ +# define SSL_SECOP_PEER_EE_KEY (SSL_SECOP_EE_KEY | SSL_SECOP_PEER) +/* Peer CA key in certificate */ +# define SSL_SECOP_PEER_CA_KEY (SSL_SECOP_CA_KEY | SSL_SECOP_PEER) +/* Peer CA digest algorithm in certificate */ +# define SSL_SECOP_PEER_CA_MD (SSL_SECOP_CA_MD | SSL_SECOP_PEER) + +void SSL_set_security_level(SSL *s, int level); +__owur int SSL_get_security_level(const SSL *s); +void SSL_set_security_callback(SSL *s, + int (*cb) (const SSL *s, const SSL_CTX *ctx, + int op, int bits, int nid, + void *other, void *ex)); +int (*SSL_get_security_callback(const SSL *s)) (const SSL *s, + const SSL_CTX *ctx, int op, + int bits, int nid, void *other, + void *ex); +void SSL_set0_security_ex_data(SSL *s, void *ex); +__owur void *SSL_get0_security_ex_data(const SSL *s); + +void SSL_CTX_set_security_level(SSL_CTX *ctx, int level); +__owur int SSL_CTX_get_security_level(const SSL_CTX *ctx); +void SSL_CTX_set_security_callback(SSL_CTX *ctx, + int (*cb) (const SSL *s, const SSL_CTX *ctx, + int op, int bits, int nid, + void *other, void *ex)); +int (*SSL_CTX_get_security_callback(const SSL_CTX *ctx)) (const SSL *s, + const SSL_CTX *ctx, + int op, int bits, + int nid, + void *other, + void *ex); +void SSL_CTX_set0_security_ex_data(SSL_CTX *ctx, void *ex); +__owur void *SSL_CTX_get0_security_ex_data(const SSL_CTX *ctx); + +/* OPENSSL_INIT flag 0x010000 reserved for internal use */ +# define OPENSSL_INIT_NO_LOAD_SSL_STRINGS 0x00100000L +# define OPENSSL_INIT_LOAD_SSL_STRINGS 0x00200000L + +# define OPENSSL_INIT_SSL_DEFAULT \ + (OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS) + +int OPENSSL_init_ssl(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings); + +# ifndef OPENSSL_NO_UNIT_TEST +__owur const struct openssl_ssl_test_functions *SSL_test_functions(void); +# endif + +__owur int SSL_free_buffers(SSL *ssl); +__owur int SSL_alloc_buffers(SSL *ssl); + +/* Status codes passed to the decrypt session ticket callback. Some of these + * are for internal use only and are never passed to the callback. */ +typedef int SSL_TICKET_STATUS; + +/* Support for ticket appdata */ +/* fatal error, malloc failure */ +# define SSL_TICKET_FATAL_ERR_MALLOC 0 +/* fatal error, either from parsing or decrypting the ticket */ +# define SSL_TICKET_FATAL_ERR_OTHER 1 +/* No ticket present */ +# define SSL_TICKET_NONE 2 +/* Empty ticket present */ +# define SSL_TICKET_EMPTY 3 +/* the ticket couldn't be decrypted */ +# define SSL_TICKET_NO_DECRYPT 4 +/* a ticket was successfully decrypted */ +# define SSL_TICKET_SUCCESS 5 +/* same as above but the ticket needs to be renewed */ +# define SSL_TICKET_SUCCESS_RENEW 6 + +/* Return codes for the decrypt session ticket callback */ +typedef int SSL_TICKET_RETURN; + +/* An error occurred */ +#define SSL_TICKET_RETURN_ABORT 0 +/* Do not use the ticket, do not send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_IGNORE 1 +/* Do not use the ticket, send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_IGNORE_RENEW 2 +/* Use the ticket, do not send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_USE 3 +/* Use the ticket, send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_USE_RENEW 4 + +typedef int (*SSL_CTX_generate_session_ticket_fn)(SSL *s, void *arg); +typedef SSL_TICKET_RETURN (*SSL_CTX_decrypt_session_ticket_fn)(SSL *s, SSL_SESSION *ss, + const unsigned char *keyname, + size_t keyname_length, + SSL_TICKET_STATUS status, + void *arg); +int SSL_CTX_set_session_ticket_cb(SSL_CTX *ctx, + SSL_CTX_generate_session_ticket_fn gen_cb, + SSL_CTX_decrypt_session_ticket_fn dec_cb, + void *arg); +int SSL_SESSION_set1_ticket_appdata(SSL_SESSION *ss, const void *data, size_t len); +int SSL_SESSION_get0_ticket_appdata(SSL_SESSION *ss, void **data, size_t *len); + +typedef unsigned int (*DTLS_timer_cb)(SSL *s, unsigned int timer_us); + +void DTLS_set_timer_cb(SSL *s, DTLS_timer_cb cb); + + +typedef int (*SSL_allow_early_data_cb_fn)(SSL *s, void *arg); +void SSL_CTX_set_allow_early_data_cb(SSL_CTX *ctx, + SSL_allow_early_data_cb_fn cb, + void *arg); +void SSL_set_allow_early_data_cb(SSL *s, + SSL_allow_early_data_cb_fn cb, + void *arg); + +/* store the default cipher strings inside the library */ +const char *OSSL_default_cipher_list(void); +const char *OSSL_default_ciphersuites(void); + +/* RFC8879 Certificate compression APIs */ + +int SSL_CTX_compress_certs(SSL_CTX *ctx, int alg); +int SSL_compress_certs(SSL *ssl, int alg); + +int SSL_CTX_set1_cert_comp_preference(SSL_CTX *ctx, int *algs, size_t len); +int SSL_set1_cert_comp_preference(SSL *ssl, int *algs, size_t len); + +int SSL_CTX_set1_compressed_cert(SSL_CTX *ctx, int algorithm, unsigned char *comp_data, + size_t comp_length, size_t orig_length); +int SSL_set1_compressed_cert(SSL *ssl, int algorithm, unsigned char *comp_data, + size_t comp_length, size_t orig_length); +size_t SSL_CTX_get1_compressed_cert(SSL_CTX *ctx, int alg, unsigned char **data, size_t *orig_len); +size_t SSL_get1_compressed_cert(SSL *ssl, int alg, unsigned char **data, size_t *orig_len); + +__owur int SSL_add_expected_rpk(SSL *s, EVP_PKEY *rpk); +__owur EVP_PKEY *SSL_get0_peer_rpk(const SSL *s); +__owur EVP_PKEY *SSL_SESSION_get0_peer_rpk(SSL_SESSION *s); +__owur int SSL_get_negotiated_client_cert_type(const SSL *s); +__owur int SSL_get_negotiated_server_cert_type(const SSL *s); + +__owur int SSL_set1_client_cert_type(SSL *s, const unsigned char *val, size_t len); +__owur int SSL_set1_server_cert_type(SSL *s, const unsigned char *val, size_t len); +__owur int SSL_CTX_set1_client_cert_type(SSL_CTX *ctx, const unsigned char *val, size_t len); +__owur int SSL_CTX_set1_server_cert_type(SSL_CTX *ctx, const unsigned char *val, size_t len); +__owur int SSL_get0_client_cert_type(const SSL *s, unsigned char **t, size_t *len); +__owur int SSL_get0_server_cert_type(const SSL *s, unsigned char **t, size_t *len); +__owur int SSL_CTX_get0_client_cert_type(const SSL_CTX *ctx, unsigned char **t, size_t *len); +__owur int SSL_CTX_get0_server_cert_type(const SSL_CTX *s, unsigned char **t, size_t *len); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ui.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ui.h new file mode 100644 index 00000000000..e64ec3b37fb --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ui.h @@ -0,0 +1,407 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ui.h.in + * + * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_UI_H +# define OPENSSL_UI_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_UI_H +# endif + +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# endif +# include +# include +# include +# include + +/* For compatibility reasons, the macro OPENSSL_NO_UI is currently retained */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifdef OPENSSL_NO_UI_CONSOLE +# define OPENSSL_NO_UI +# endif +# endif + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * All the following functions return -1 or NULL on error and in some cases + * (UI_process()) -2 if interrupted or in some other way cancelled. When + * everything is fine, they return 0, a positive value or a non-NULL pointer, + * all depending on their purpose. + */ + +/* Creators and destructor. */ +UI *UI_new(void); +UI *UI_new_method(const UI_METHOD *method); +void UI_free(UI *ui); + +/*- + The following functions are used to add strings to be printed and prompt + strings to prompt for data. The names are UI_{add,dup}__string + and UI_{add,dup}_input_boolean. + + UI_{add,dup}__string have the following meanings: + add add a text or prompt string. The pointers given to these + functions are used verbatim, no copying is done. + dup make a copy of the text or prompt string, then add the copy + to the collection of strings in the user interface. + + The function is a name for the functionality that the given + string shall be used for. It can be one of: + input use the string as data prompt. + verify use the string as verification prompt. This + is used to verify a previous input. + info use the string for informational output. + error use the string for error output. + Honestly, there's currently no difference between info and error for the + moment. + + UI_{add,dup}_input_boolean have the same semantics for "add" and "dup", + and are typically used when one wants to prompt for a yes/no response. + + All of the functions in this group take a UI and a prompt string. + The string input and verify addition functions also take a flag argument, + a buffer for the result to end up with, a minimum input size and a maximum + input size (the result buffer MUST be large enough to be able to contain + the maximum number of characters). Additionally, the verify addition + functions takes another buffer to compare the result against. + The boolean input functions take an action description string (which should + be safe to ignore if the expected user action is obvious, for example with + a dialog box with an OK button and a Cancel button), a string of acceptable + characters to mean OK and to mean Cancel. The two last strings are checked + to make sure they don't have common characters. Additionally, the same + flag argument as for the string input is taken, as well as a result buffer. + The result buffer is required to be at least one byte long. Depending on + the answer, the first character from the OK or the Cancel character strings + will be stored in the first byte of the result buffer. No NUL will be + added, so the result is *not* a string. + + On success, the all return an index of the added information. That index + is useful when retrieving results with UI_get0_result(). */ +int UI_add_input_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize); +int UI_dup_input_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize); +int UI_add_verify_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize, + const char *test_buf); +int UI_dup_verify_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize, + const char *test_buf); +int UI_add_input_boolean(UI *ui, const char *prompt, const char *action_desc, + const char *ok_chars, const char *cancel_chars, + int flags, char *result_buf); +int UI_dup_input_boolean(UI *ui, const char *prompt, const char *action_desc, + const char *ok_chars, const char *cancel_chars, + int flags, char *result_buf); +int UI_add_info_string(UI *ui, const char *text); +int UI_dup_info_string(UI *ui, const char *text); +int UI_add_error_string(UI *ui, const char *text); +int UI_dup_error_string(UI *ui, const char *text); + +/* These are the possible flags. They can be or'ed together. */ +/* Use to have echoing of input */ +# define UI_INPUT_FLAG_ECHO 0x01 +/* + * Use a default password. Where that password is found is completely up to + * the application, it might for example be in the user data set with + * UI_add_user_data(). It is not recommended to have more than one input in + * each UI being marked with this flag, or the application might get + * confused. + */ +# define UI_INPUT_FLAG_DEFAULT_PWD 0x02 + +/*- + * The user of these routines may want to define flags of their own. The core + * UI won't look at those, but will pass them on to the method routines. They + * must use higher bits so they don't get confused with the UI bits above. + * UI_INPUT_FLAG_USER_BASE tells which is the lowest bit to use. A good + * example of use is this: + * + * #define MY_UI_FLAG1 (0x01 << UI_INPUT_FLAG_USER_BASE) + * +*/ +# define UI_INPUT_FLAG_USER_BASE 16 + +/*- + * The following function helps construct a prompt. + * phrase_desc is a textual short description of the phrase to enter, + * for example "pass phrase", and + * object_name is the name of the object + * (which might be a card name or a file name) or NULL. + * The returned string shall always be allocated on the heap with + * OPENSSL_malloc(), and need to be free'd with OPENSSL_free(). + * + * If the ui_method doesn't contain a pointer to a user-defined prompt + * constructor, a default string is built, looking like this: + * + * "Enter {phrase_desc} for {object_name}:" + * + * So, if phrase_desc has the value "pass phrase" and object_name has + * the value "foo.key", the resulting string is: + * + * "Enter pass phrase for foo.key:" +*/ +char *UI_construct_prompt(UI *ui_method, + const char *phrase_desc, const char *object_name); + +/* + * The following function is used to store a pointer to user-specific data. + * Any previous such pointer will be returned and replaced. + * + * For callback purposes, this function makes a lot more sense than using + * ex_data, since the latter requires that different parts of OpenSSL or + * applications share the same ex_data index. + * + * Note that the UI_OpenSSL() method completely ignores the user data. Other + * methods may not, however. + */ +void *UI_add_user_data(UI *ui, void *user_data); +/* + * Alternatively, this function is used to duplicate the user data. + * This uses the duplicator method function. The destroy function will + * be used to free the user data in this case. + */ +int UI_dup_user_data(UI *ui, void *user_data); +/* We need a user data retrieving function as well. */ +void *UI_get0_user_data(UI *ui); + +/* Return the result associated with a prompt given with the index i. */ +const char *UI_get0_result(UI *ui, int i); +int UI_get_result_length(UI *ui, int i); + +/* When all strings have been added, process the whole thing. */ +int UI_process(UI *ui); + +/* + * Give a user interface parameterised control commands. This can be used to + * send down an integer, a data pointer or a function pointer, as well as be + * used to get information from a UI. + */ +int UI_ctrl(UI *ui, int cmd, long i, void *p, void (*f) (void)); + +/* The commands */ +/* + * Use UI_CONTROL_PRINT_ERRORS with the value 1 to have UI_process print the + * OpenSSL error stack before printing any info or added error messages and + * before any prompting. + */ +# define UI_CTRL_PRINT_ERRORS 1 +/* + * Check if a UI_process() is possible to do again with the same instance of + * a user interface. This makes UI_ctrl() return 1 if it is redoable, and 0 + * if not. + */ +# define UI_CTRL_IS_REDOABLE 2 + +/* Some methods may use extra data */ +# define UI_set_app_data(s,arg) UI_set_ex_data(s,0,arg) +# define UI_get_app_data(s) UI_get_ex_data(s,0) + +# define UI_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_UI, l, p, newf, dupf, freef) +int UI_set_ex_data(UI *r, int idx, void *arg); +void *UI_get_ex_data(const UI *r, int idx); + +/* Use specific methods instead of the built-in one */ +void UI_set_default_method(const UI_METHOD *meth); +const UI_METHOD *UI_get_default_method(void); +const UI_METHOD *UI_get_method(UI *ui); +const UI_METHOD *UI_set_method(UI *ui, const UI_METHOD *meth); + +# ifndef OPENSSL_NO_UI_CONSOLE + +/* The method with all the built-in thingies */ +UI_METHOD *UI_OpenSSL(void); + +# endif + +/* + * NULL method. Literally does nothing, but may serve as a placeholder + * to avoid internal default. + */ +const UI_METHOD *UI_null(void); + +/* ---------- For method writers ---------- */ +/*- + A method contains a number of functions that implement the low level + of the User Interface. The functions are: + + an opener This function starts a session, maybe by opening + a channel to a tty, or by opening a window. + a writer This function is called to write a given string, + maybe to the tty, maybe as a field label in a + window. + a flusher This function is called to flush everything that + has been output so far. It can be used to actually + display a dialog box after it has been built. + a reader This function is called to read a given prompt, + maybe from the tty, maybe from a field in a + window. Note that it's called with all string + structures, not only the prompt ones, so it must + check such things itself. + a closer This function closes the session, maybe by closing + the channel to the tty, or closing the window. + + All these functions are expected to return: + + 0 on error. + 1 on success. + -1 on out-of-band events, for example if some prompting has + been canceled (by pressing Ctrl-C, for example). This is + only checked when returned by the flusher or the reader. + + The way this is used, the opener is first called, then the writer for all + strings, then the flusher, then the reader for all strings and finally the + closer. Note that if you want to prompt from a terminal or other command + line interface, the best is to have the reader also write the prompts + instead of having the writer do it. If you want to prompt from a dialog + box, the writer can be used to build up the contents of the box, and the + flusher to actually display the box and run the event loop until all data + has been given, after which the reader only grabs the given data and puts + them back into the UI strings. + + All method functions take a UI as argument. Additionally, the writer and + the reader take a UI_STRING. +*/ + +/* + * The UI_STRING type is the data structure that contains all the needed info + * about a string or a prompt, including test data for a verification prompt. + */ +typedef struct ui_string_st UI_STRING; + +SKM_DEFINE_STACK_OF_INTERNAL(UI_STRING, UI_STRING, UI_STRING) +#define sk_UI_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_value(sk, idx) ((UI_STRING *)OPENSSL_sk_value(ossl_check_const_UI_STRING_sk_type(sk), (idx))) +#define sk_UI_STRING_new(cmp) ((STACK_OF(UI_STRING) *)OPENSSL_sk_new(ossl_check_UI_STRING_compfunc_type(cmp))) +#define sk_UI_STRING_new_null() ((STACK_OF(UI_STRING) *)OPENSSL_sk_new_null()) +#define sk_UI_STRING_new_reserve(cmp, n) ((STACK_OF(UI_STRING) *)OPENSSL_sk_new_reserve(ossl_check_UI_STRING_compfunc_type(cmp), (n))) +#define sk_UI_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_UI_STRING_sk_type(sk), (n)) +#define sk_UI_STRING_free(sk) OPENSSL_sk_free(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_delete(sk, i) ((UI_STRING *)OPENSSL_sk_delete(ossl_check_UI_STRING_sk_type(sk), (i))) +#define sk_UI_STRING_delete_ptr(sk, ptr) ((UI_STRING *)OPENSSL_sk_delete_ptr(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr))) +#define sk_UI_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_pop(sk) ((UI_STRING *)OPENSSL_sk_pop(ossl_check_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_shift(sk) ((UI_STRING *)OPENSSL_sk_shift(ossl_check_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_UI_STRING_sk_type(sk),ossl_check_UI_STRING_freefunc_type(freefunc)) +#define sk_UI_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr), (idx)) +#define sk_UI_STRING_set(sk, idx, ptr) ((UI_STRING *)OPENSSL_sk_set(ossl_check_UI_STRING_sk_type(sk), (idx), ossl_check_UI_STRING_type(ptr))) +#define sk_UI_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr), pnum) +#define sk_UI_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_dup(sk) ((STACK_OF(UI_STRING) *)OPENSSL_sk_dup(ossl_check_const_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(UI_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_UI_STRING_sk_type(sk), ossl_check_UI_STRING_copyfunc_type(copyfunc), ossl_check_UI_STRING_freefunc_type(freefunc))) +#define sk_UI_STRING_set_cmp_func(sk, cmp) ((sk_UI_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_compfunc_type(cmp))) + + +/* + * The different types of strings that are currently supported. This is only + * needed by method authors. + */ +enum UI_string_types { + UIT_NONE = 0, + UIT_PROMPT, /* Prompt for a string */ + UIT_VERIFY, /* Prompt for a string and verify */ + UIT_BOOLEAN, /* Prompt for a yes/no response */ + UIT_INFO, /* Send info to the user */ + UIT_ERROR /* Send an error message to the user */ +}; + +/* Create and manipulate methods */ +UI_METHOD *UI_create_method(const char *name); +void UI_destroy_method(UI_METHOD *ui_method); +int UI_method_set_opener(UI_METHOD *method, int (*opener) (UI *ui)); +int UI_method_set_writer(UI_METHOD *method, + int (*writer) (UI *ui, UI_STRING *uis)); +int UI_method_set_flusher(UI_METHOD *method, int (*flusher) (UI *ui)); +int UI_method_set_reader(UI_METHOD *method, + int (*reader) (UI *ui, UI_STRING *uis)); +int UI_method_set_closer(UI_METHOD *method, int (*closer) (UI *ui)); +int UI_method_set_data_duplicator(UI_METHOD *method, + void *(*duplicator) (UI *ui, void *ui_data), + void (*destructor)(UI *ui, void *ui_data)); +int UI_method_set_prompt_constructor(UI_METHOD *method, + char *(*prompt_constructor) (UI *ui, + const char + *phrase_desc, + const char + *object_name)); +int UI_method_set_ex_data(UI_METHOD *method, int idx, void *data); +int (*UI_method_get_opener(const UI_METHOD *method)) (UI *); +int (*UI_method_get_writer(const UI_METHOD *method)) (UI *, UI_STRING *); +int (*UI_method_get_flusher(const UI_METHOD *method)) (UI *); +int (*UI_method_get_reader(const UI_METHOD *method)) (UI *, UI_STRING *); +int (*UI_method_get_closer(const UI_METHOD *method)) (UI *); +char *(*UI_method_get_prompt_constructor(const UI_METHOD *method)) + (UI *, const char *, const char *); +void *(*UI_method_get_data_duplicator(const UI_METHOD *method)) (UI *, void *); +void (*UI_method_get_data_destructor(const UI_METHOD *method)) (UI *, void *); +const void *UI_method_get_ex_data(const UI_METHOD *method, int idx); + +/* + * The following functions are helpers for method writers to access relevant + * data from a UI_STRING. + */ + +/* Return type of the UI_STRING */ +enum UI_string_types UI_get_string_type(UI_STRING *uis); +/* Return input flags of the UI_STRING */ +int UI_get_input_flags(UI_STRING *uis); +/* Return the actual string to output (the prompt, info or error) */ +const char *UI_get0_output_string(UI_STRING *uis); +/* + * Return the optional action string to output (the boolean prompt + * instruction) + */ +const char *UI_get0_action_string(UI_STRING *uis); +/* Return the result of a prompt */ +const char *UI_get0_result_string(UI_STRING *uis); +int UI_get_result_string_length(UI_STRING *uis); +/* + * Return the string to test the result against. Only useful with verifies. + */ +const char *UI_get0_test_string(UI_STRING *uis); +/* Return the required minimum size of the result */ +int UI_get_result_minsize(UI_STRING *uis); +/* Return the required maximum size of the result */ +int UI_get_result_maxsize(UI_STRING *uis); +/* Set the result of a UI_STRING. */ +int UI_set_result(UI *ui, UI_STRING *uis, const char *result); +int UI_set_result_ex(UI *ui, UI_STRING *uis, const char *result, int len); + +/* A couple of popular utility functions */ +int UI_UTIL_read_pw_string(char *buf, int length, const char *prompt, + int verify); +int UI_UTIL_read_pw(char *buf, char *buff, int size, const char *prompt, + int verify); +UI_METHOD *UI_UTIL_wrap_read_pem_callback(pem_password_cb *cb, int rwflag); + + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509.h new file mode 100644 index 00000000000..ac1326330b8 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509.h @@ -0,0 +1,1286 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509_H +# define OPENSSL_X509_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509_H +# endif + +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# include +# include +# endif + +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Needed stacks for types defined in other headers */ +SKM_DEFINE_STACK_OF_INTERNAL(X509_NAME, X509_NAME, X509_NAME) +#define sk_X509_NAME_num(sk) OPENSSL_sk_num(ossl_check_const_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_value(sk, idx) ((X509_NAME *)OPENSSL_sk_value(ossl_check_const_X509_NAME_sk_type(sk), (idx))) +#define sk_X509_NAME_new(cmp) ((STACK_OF(X509_NAME) *)OPENSSL_sk_new(ossl_check_X509_NAME_compfunc_type(cmp))) +#define sk_X509_NAME_new_null() ((STACK_OF(X509_NAME) *)OPENSSL_sk_new_null()) +#define sk_X509_NAME_new_reserve(cmp, n) ((STACK_OF(X509_NAME) *)OPENSSL_sk_new_reserve(ossl_check_X509_NAME_compfunc_type(cmp), (n))) +#define sk_X509_NAME_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_NAME_sk_type(sk), (n)) +#define sk_X509_NAME_free(sk) OPENSSL_sk_free(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_zero(sk) OPENSSL_sk_zero(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_delete(sk, i) ((X509_NAME *)OPENSSL_sk_delete(ossl_check_X509_NAME_sk_type(sk), (i))) +#define sk_X509_NAME_delete_ptr(sk, ptr) ((X509_NAME *)OPENSSL_sk_delete_ptr(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr))) +#define sk_X509_NAME_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_pop(sk) ((X509_NAME *)OPENSSL_sk_pop(ossl_check_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_shift(sk) ((X509_NAME *)OPENSSL_sk_shift(ossl_check_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_NAME_sk_type(sk),ossl_check_X509_NAME_freefunc_type(freefunc)) +#define sk_X509_NAME_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr), (idx)) +#define sk_X509_NAME_set(sk, idx, ptr) ((X509_NAME *)OPENSSL_sk_set(ossl_check_X509_NAME_sk_type(sk), (idx), ossl_check_X509_NAME_type(ptr))) +#define sk_X509_NAME_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr), pnum) +#define sk_X509_NAME_sort(sk) OPENSSL_sk_sort(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_dup(sk) ((STACK_OF(X509_NAME) *)OPENSSL_sk_dup(ossl_check_const_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_NAME) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_NAME_sk_type(sk), ossl_check_X509_NAME_copyfunc_type(copyfunc), ossl_check_X509_NAME_freefunc_type(freefunc))) +#define sk_X509_NAME_set_cmp_func(sk, cmp) ((sk_X509_NAME_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509, X509, X509) +#define sk_X509_num(sk) OPENSSL_sk_num(ossl_check_const_X509_sk_type(sk)) +#define sk_X509_value(sk, idx) ((X509 *)OPENSSL_sk_value(ossl_check_const_X509_sk_type(sk), (idx))) +#define sk_X509_new(cmp) ((STACK_OF(X509) *)OPENSSL_sk_new(ossl_check_X509_compfunc_type(cmp))) +#define sk_X509_new_null() ((STACK_OF(X509) *)OPENSSL_sk_new_null()) +#define sk_X509_new_reserve(cmp, n) ((STACK_OF(X509) *)OPENSSL_sk_new_reserve(ossl_check_X509_compfunc_type(cmp), (n))) +#define sk_X509_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_sk_type(sk), (n)) +#define sk_X509_free(sk) OPENSSL_sk_free(ossl_check_X509_sk_type(sk)) +#define sk_X509_zero(sk) OPENSSL_sk_zero(ossl_check_X509_sk_type(sk)) +#define sk_X509_delete(sk, i) ((X509 *)OPENSSL_sk_delete(ossl_check_X509_sk_type(sk), (i))) +#define sk_X509_delete_ptr(sk, ptr) ((X509 *)OPENSSL_sk_delete_ptr(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr))) +#define sk_X509_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_pop(sk) ((X509 *)OPENSSL_sk_pop(ossl_check_X509_sk_type(sk))) +#define sk_X509_shift(sk) ((X509 *)OPENSSL_sk_shift(ossl_check_X509_sk_type(sk))) +#define sk_X509_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_sk_type(sk),ossl_check_X509_freefunc_type(freefunc)) +#define sk_X509_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr), (idx)) +#define sk_X509_set(sk, idx, ptr) ((X509 *)OPENSSL_sk_set(ossl_check_X509_sk_type(sk), (idx), ossl_check_X509_type(ptr))) +#define sk_X509_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr), pnum) +#define sk_X509_sort(sk) OPENSSL_sk_sort(ossl_check_X509_sk_type(sk)) +#define sk_X509_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_sk_type(sk)) +#define sk_X509_dup(sk) ((STACK_OF(X509) *)OPENSSL_sk_dup(ossl_check_const_X509_sk_type(sk))) +#define sk_X509_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_sk_type(sk), ossl_check_X509_copyfunc_type(copyfunc), ossl_check_X509_freefunc_type(freefunc))) +#define sk_X509_set_cmp_func(sk, cmp) ((sk_X509_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_sk_type(sk), ossl_check_X509_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_REVOKED, X509_REVOKED, X509_REVOKED) +#define sk_X509_REVOKED_num(sk) OPENSSL_sk_num(ossl_check_const_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_value(sk, idx) ((X509_REVOKED *)OPENSSL_sk_value(ossl_check_const_X509_REVOKED_sk_type(sk), (idx))) +#define sk_X509_REVOKED_new(cmp) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new(ossl_check_X509_REVOKED_compfunc_type(cmp))) +#define sk_X509_REVOKED_new_null() ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new_null()) +#define sk_X509_REVOKED_new_reserve(cmp, n) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new_reserve(ossl_check_X509_REVOKED_compfunc_type(cmp), (n))) +#define sk_X509_REVOKED_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_REVOKED_sk_type(sk), (n)) +#define sk_X509_REVOKED_free(sk) OPENSSL_sk_free(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_zero(sk) OPENSSL_sk_zero(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_delete(sk, i) ((X509_REVOKED *)OPENSSL_sk_delete(ossl_check_X509_REVOKED_sk_type(sk), (i))) +#define sk_X509_REVOKED_delete_ptr(sk, ptr) ((X509_REVOKED *)OPENSSL_sk_delete_ptr(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr))) +#define sk_X509_REVOKED_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_pop(sk) ((X509_REVOKED *)OPENSSL_sk_pop(ossl_check_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_shift(sk) ((X509_REVOKED *)OPENSSL_sk_shift(ossl_check_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_REVOKED_sk_type(sk),ossl_check_X509_REVOKED_freefunc_type(freefunc)) +#define sk_X509_REVOKED_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr), (idx)) +#define sk_X509_REVOKED_set(sk, idx, ptr) ((X509_REVOKED *)OPENSSL_sk_set(ossl_check_X509_REVOKED_sk_type(sk), (idx), ossl_check_X509_REVOKED_type(ptr))) +#define sk_X509_REVOKED_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr), pnum) +#define sk_X509_REVOKED_sort(sk) OPENSSL_sk_sort(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_dup(sk) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_dup(ossl_check_const_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_copyfunc_type(copyfunc), ossl_check_X509_REVOKED_freefunc_type(freefunc))) +#define sk_X509_REVOKED_set_cmp_func(sk, cmp) ((sk_X509_REVOKED_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_CRL, X509_CRL, X509_CRL) +#define sk_X509_CRL_num(sk) OPENSSL_sk_num(ossl_check_const_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_value(sk, idx) ((X509_CRL *)OPENSSL_sk_value(ossl_check_const_X509_CRL_sk_type(sk), (idx))) +#define sk_X509_CRL_new(cmp) ((STACK_OF(X509_CRL) *)OPENSSL_sk_new(ossl_check_X509_CRL_compfunc_type(cmp))) +#define sk_X509_CRL_new_null() ((STACK_OF(X509_CRL) *)OPENSSL_sk_new_null()) +#define sk_X509_CRL_new_reserve(cmp, n) ((STACK_OF(X509_CRL) *)OPENSSL_sk_new_reserve(ossl_check_X509_CRL_compfunc_type(cmp), (n))) +#define sk_X509_CRL_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_CRL_sk_type(sk), (n)) +#define sk_X509_CRL_free(sk) OPENSSL_sk_free(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_zero(sk) OPENSSL_sk_zero(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_delete(sk, i) ((X509_CRL *)OPENSSL_sk_delete(ossl_check_X509_CRL_sk_type(sk), (i))) +#define sk_X509_CRL_delete_ptr(sk, ptr) ((X509_CRL *)OPENSSL_sk_delete_ptr(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr))) +#define sk_X509_CRL_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_pop(sk) ((X509_CRL *)OPENSSL_sk_pop(ossl_check_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_shift(sk) ((X509_CRL *)OPENSSL_sk_shift(ossl_check_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_CRL_sk_type(sk),ossl_check_X509_CRL_freefunc_type(freefunc)) +#define sk_X509_CRL_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr), (idx)) +#define sk_X509_CRL_set(sk, idx, ptr) ((X509_CRL *)OPENSSL_sk_set(ossl_check_X509_CRL_sk_type(sk), (idx), ossl_check_X509_CRL_type(ptr))) +#define sk_X509_CRL_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr), pnum) +#define sk_X509_CRL_sort(sk) OPENSSL_sk_sort(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_dup(sk) ((STACK_OF(X509_CRL) *)OPENSSL_sk_dup(ossl_check_const_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_CRL) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_CRL_sk_type(sk), ossl_check_X509_CRL_copyfunc_type(copyfunc), ossl_check_X509_CRL_freefunc_type(freefunc))) +#define sk_X509_CRL_set_cmp_func(sk, cmp) ((sk_X509_CRL_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_compfunc_type(cmp))) + + +/* Flags for X509_get_signature_info() */ +/* Signature info is valid */ +# define X509_SIG_INFO_VALID 0x1 +/* Signature is suitable for TLS use */ +# define X509_SIG_INFO_TLS 0x2 + +# define X509_FILETYPE_PEM 1 +# define X509_FILETYPE_ASN1 2 +# define X509_FILETYPE_DEFAULT 3 + +# define X509v3_KU_DIGITAL_SIGNATURE 0x0080 +# define X509v3_KU_NON_REPUDIATION 0x0040 +# define X509v3_KU_KEY_ENCIPHERMENT 0x0020 +# define X509v3_KU_DATA_ENCIPHERMENT 0x0010 +# define X509v3_KU_KEY_AGREEMENT 0x0008 +# define X509v3_KU_KEY_CERT_SIGN 0x0004 +# define X509v3_KU_CRL_SIGN 0x0002 +# define X509v3_KU_ENCIPHER_ONLY 0x0001 +# define X509v3_KU_DECIPHER_ONLY 0x8000 +# define X509v3_KU_UNDEF 0xffff + +struct X509_algor_st { + ASN1_OBJECT *algorithm; + ASN1_TYPE *parameter; +} /* X509_ALGOR */ ; + +typedef STACK_OF(X509_ALGOR) X509_ALGORS; + +typedef struct X509_val_st { + ASN1_TIME *notBefore; + ASN1_TIME *notAfter; +} X509_VAL; + +typedef struct X509_sig_st X509_SIG; + +typedef struct X509_name_entry_st X509_NAME_ENTRY; + +SKM_DEFINE_STACK_OF_INTERNAL(X509_NAME_ENTRY, X509_NAME_ENTRY, X509_NAME_ENTRY) +#define sk_X509_NAME_ENTRY_num(sk) OPENSSL_sk_num(ossl_check_const_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_value(sk, idx) ((X509_NAME_ENTRY *)OPENSSL_sk_value(ossl_check_const_X509_NAME_ENTRY_sk_type(sk), (idx))) +#define sk_X509_NAME_ENTRY_new(cmp) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new(ossl_check_X509_NAME_ENTRY_compfunc_type(cmp))) +#define sk_X509_NAME_ENTRY_new_null() ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new_null()) +#define sk_X509_NAME_ENTRY_new_reserve(cmp, n) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new_reserve(ossl_check_X509_NAME_ENTRY_compfunc_type(cmp), (n))) +#define sk_X509_NAME_ENTRY_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_NAME_ENTRY_sk_type(sk), (n)) +#define sk_X509_NAME_ENTRY_free(sk) OPENSSL_sk_free(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_zero(sk) OPENSSL_sk_zero(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_delete(sk, i) ((X509_NAME_ENTRY *)OPENSSL_sk_delete(ossl_check_X509_NAME_ENTRY_sk_type(sk), (i))) +#define sk_X509_NAME_ENTRY_delete_ptr(sk, ptr) ((X509_NAME_ENTRY *)OPENSSL_sk_delete_ptr(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr))) +#define sk_X509_NAME_ENTRY_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_pop(sk) ((X509_NAME_ENTRY *)OPENSSL_sk_pop(ossl_check_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_shift(sk) ((X509_NAME_ENTRY *)OPENSSL_sk_shift(ossl_check_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_NAME_ENTRY_sk_type(sk),ossl_check_X509_NAME_ENTRY_freefunc_type(freefunc)) +#define sk_X509_NAME_ENTRY_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr), (idx)) +#define sk_X509_NAME_ENTRY_set(sk, idx, ptr) ((X509_NAME_ENTRY *)OPENSSL_sk_set(ossl_check_X509_NAME_ENTRY_sk_type(sk), (idx), ossl_check_X509_NAME_ENTRY_type(ptr))) +#define sk_X509_NAME_ENTRY_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr), pnum) +#define sk_X509_NAME_ENTRY_sort(sk) OPENSSL_sk_sort(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_dup(sk) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_dup(ossl_check_const_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_copyfunc_type(copyfunc), ossl_check_X509_NAME_ENTRY_freefunc_type(freefunc))) +#define sk_X509_NAME_ENTRY_set_cmp_func(sk, cmp) ((sk_X509_NAME_ENTRY_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_compfunc_type(cmp))) + + +# define X509_EX_V_NETSCAPE_HACK 0x8000 +# define X509_EX_V_INIT 0x0001 +typedef struct X509_extension_st X509_EXTENSION; +SKM_DEFINE_STACK_OF_INTERNAL(X509_EXTENSION, X509_EXTENSION, X509_EXTENSION) +#define sk_X509_EXTENSION_num(sk) OPENSSL_sk_num(ossl_check_const_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_value(sk, idx) ((X509_EXTENSION *)OPENSSL_sk_value(ossl_check_const_X509_EXTENSION_sk_type(sk), (idx))) +#define sk_X509_EXTENSION_new(cmp) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new(ossl_check_X509_EXTENSION_compfunc_type(cmp))) +#define sk_X509_EXTENSION_new_null() ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new_null()) +#define sk_X509_EXTENSION_new_reserve(cmp, n) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new_reserve(ossl_check_X509_EXTENSION_compfunc_type(cmp), (n))) +#define sk_X509_EXTENSION_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_EXTENSION_sk_type(sk), (n)) +#define sk_X509_EXTENSION_free(sk) OPENSSL_sk_free(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_zero(sk) OPENSSL_sk_zero(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_delete(sk, i) ((X509_EXTENSION *)OPENSSL_sk_delete(ossl_check_X509_EXTENSION_sk_type(sk), (i))) +#define sk_X509_EXTENSION_delete_ptr(sk, ptr) ((X509_EXTENSION *)OPENSSL_sk_delete_ptr(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr))) +#define sk_X509_EXTENSION_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_pop(sk) ((X509_EXTENSION *)OPENSSL_sk_pop(ossl_check_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_shift(sk) ((X509_EXTENSION *)OPENSSL_sk_shift(ossl_check_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_EXTENSION_sk_type(sk),ossl_check_X509_EXTENSION_freefunc_type(freefunc)) +#define sk_X509_EXTENSION_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr), (idx)) +#define sk_X509_EXTENSION_set(sk, idx, ptr) ((X509_EXTENSION *)OPENSSL_sk_set(ossl_check_X509_EXTENSION_sk_type(sk), (idx), ossl_check_X509_EXTENSION_type(ptr))) +#define sk_X509_EXTENSION_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr), pnum) +#define sk_X509_EXTENSION_sort(sk) OPENSSL_sk_sort(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_dup(sk) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_dup(ossl_check_const_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_copyfunc_type(copyfunc), ossl_check_X509_EXTENSION_freefunc_type(freefunc))) +#define sk_X509_EXTENSION_set_cmp_func(sk, cmp) ((sk_X509_EXTENSION_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_compfunc_type(cmp))) + +typedef STACK_OF(X509_EXTENSION) X509_EXTENSIONS; +typedef struct x509_attributes_st X509_ATTRIBUTE; +SKM_DEFINE_STACK_OF_INTERNAL(X509_ATTRIBUTE, X509_ATTRIBUTE, X509_ATTRIBUTE) +#define sk_X509_ATTRIBUTE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_value(sk, idx) ((X509_ATTRIBUTE *)OPENSSL_sk_value(ossl_check_const_X509_ATTRIBUTE_sk_type(sk), (idx))) +#define sk_X509_ATTRIBUTE_new(cmp) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new(ossl_check_X509_ATTRIBUTE_compfunc_type(cmp))) +#define sk_X509_ATTRIBUTE_new_null() ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new_null()) +#define sk_X509_ATTRIBUTE_new_reserve(cmp, n) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new_reserve(ossl_check_X509_ATTRIBUTE_compfunc_type(cmp), (n))) +#define sk_X509_ATTRIBUTE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_ATTRIBUTE_sk_type(sk), (n)) +#define sk_X509_ATTRIBUTE_free(sk) OPENSSL_sk_free(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_delete(sk, i) ((X509_ATTRIBUTE *)OPENSSL_sk_delete(ossl_check_X509_ATTRIBUTE_sk_type(sk), (i))) +#define sk_X509_ATTRIBUTE_delete_ptr(sk, ptr) ((X509_ATTRIBUTE *)OPENSSL_sk_delete_ptr(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr))) +#define sk_X509_ATTRIBUTE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_pop(sk) ((X509_ATTRIBUTE *)OPENSSL_sk_pop(ossl_check_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_shift(sk) ((X509_ATTRIBUTE *)OPENSSL_sk_shift(ossl_check_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_ATTRIBUTE_sk_type(sk),ossl_check_X509_ATTRIBUTE_freefunc_type(freefunc)) +#define sk_X509_ATTRIBUTE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr), (idx)) +#define sk_X509_ATTRIBUTE_set(sk, idx, ptr) ((X509_ATTRIBUTE *)OPENSSL_sk_set(ossl_check_X509_ATTRIBUTE_sk_type(sk), (idx), ossl_check_X509_ATTRIBUTE_type(ptr))) +#define sk_X509_ATTRIBUTE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr), pnum) +#define sk_X509_ATTRIBUTE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_dup(sk) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_dup(ossl_check_const_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_copyfunc_type(copyfunc), ossl_check_X509_ATTRIBUTE_freefunc_type(freefunc))) +#define sk_X509_ATTRIBUTE_set_cmp_func(sk, cmp) ((sk_X509_ATTRIBUTE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_compfunc_type(cmp))) + +typedef struct X509_req_info_st X509_REQ_INFO; +typedef struct X509_req_st X509_REQ; +typedef struct x509_cert_aux_st X509_CERT_AUX; +typedef struct x509_cinf_st X509_CINF; + +/* Flags for X509_print_ex() */ + +# define X509_FLAG_COMPAT 0 +# define X509_FLAG_NO_HEADER 1L +# define X509_FLAG_NO_VERSION (1L << 1) +# define X509_FLAG_NO_SERIAL (1L << 2) +# define X509_FLAG_NO_SIGNAME (1L << 3) +# define X509_FLAG_NO_ISSUER (1L << 4) +# define X509_FLAG_NO_VALIDITY (1L << 5) +# define X509_FLAG_NO_SUBJECT (1L << 6) +# define X509_FLAG_NO_PUBKEY (1L << 7) +# define X509_FLAG_NO_EXTENSIONS (1L << 8) +# define X509_FLAG_NO_SIGDUMP (1L << 9) +# define X509_FLAG_NO_AUX (1L << 10) +# define X509_FLAG_NO_ATTRIBUTES (1L << 11) +# define X509_FLAG_NO_IDS (1L << 12) +# define X509_FLAG_EXTENSIONS_ONLY_KID (1L << 13) + +/* Flags specific to X509_NAME_print_ex() */ + +/* The field separator information */ + +# define XN_FLAG_SEP_MASK (0xf << 16) + +# define XN_FLAG_COMPAT 0/* Traditional; use old X509_NAME_print */ +# define XN_FLAG_SEP_COMMA_PLUS (1 << 16)/* RFC2253 ,+ */ +# define XN_FLAG_SEP_CPLUS_SPC (2 << 16)/* ,+ spaced: more readable */ +# define XN_FLAG_SEP_SPLUS_SPC (3 << 16)/* ;+ spaced */ +# define XN_FLAG_SEP_MULTILINE (4 << 16)/* One line per field */ + +# define XN_FLAG_DN_REV (1 << 20)/* Reverse DN order */ + +/* How the field name is shown */ + +# define XN_FLAG_FN_MASK (0x3 << 21) + +# define XN_FLAG_FN_SN 0/* Object short name */ +# define XN_FLAG_FN_LN (1 << 21)/* Object long name */ +# define XN_FLAG_FN_OID (2 << 21)/* Always use OIDs */ +# define XN_FLAG_FN_NONE (3 << 21)/* No field names */ + +# define XN_FLAG_SPC_EQ (1 << 23)/* Put spaces round '=' */ + +/* + * This determines if we dump fields we don't recognise: RFC2253 requires + * this. + */ + +# define XN_FLAG_DUMP_UNKNOWN_FIELDS (1 << 24) + +# define XN_FLAG_FN_ALIGN (1 << 25)/* Align field names to 20 + * characters */ + +/* Complete set of RFC2253 flags */ + +# define XN_FLAG_RFC2253 (ASN1_STRFLGS_RFC2253 | \ + XN_FLAG_SEP_COMMA_PLUS | \ + XN_FLAG_DN_REV | \ + XN_FLAG_FN_SN | \ + XN_FLAG_DUMP_UNKNOWN_FIELDS) + +/* readable oneline form */ + +# define XN_FLAG_ONELINE (ASN1_STRFLGS_RFC2253 | \ + ASN1_STRFLGS_ESC_QUOTE | \ + XN_FLAG_SEP_CPLUS_SPC | \ + XN_FLAG_SPC_EQ | \ + XN_FLAG_FN_SN) + +/* readable multiline form */ + +# define XN_FLAG_MULTILINE (ASN1_STRFLGS_ESC_CTRL | \ + ASN1_STRFLGS_ESC_MSB | \ + XN_FLAG_SEP_MULTILINE | \ + XN_FLAG_SPC_EQ | \ + XN_FLAG_FN_LN | \ + XN_FLAG_FN_ALIGN) + +typedef struct X509_crl_info_st X509_CRL_INFO; + +typedef struct private_key_st { + int version; + /* The PKCS#8 data types */ + X509_ALGOR *enc_algor; + ASN1_OCTET_STRING *enc_pkey; /* encrypted pub key */ + /* When decrypted, the following will not be NULL */ + EVP_PKEY *dec_pkey; + /* used to encrypt and decrypt */ + int key_length; + char *key_data; + int key_free; /* true if we should auto free key_data */ + /* expanded version of 'enc_algor' */ + EVP_CIPHER_INFO cipher; +} X509_PKEY; + +typedef struct X509_info_st { + X509 *x509; + X509_CRL *crl; + X509_PKEY *x_pkey; + EVP_CIPHER_INFO enc_cipher; + int enc_len; + char *enc_data; +} X509_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(X509_INFO, X509_INFO, X509_INFO) +#define sk_X509_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_value(sk, idx) ((X509_INFO *)OPENSSL_sk_value(ossl_check_const_X509_INFO_sk_type(sk), (idx))) +#define sk_X509_INFO_new(cmp) ((STACK_OF(X509_INFO) *)OPENSSL_sk_new(ossl_check_X509_INFO_compfunc_type(cmp))) +#define sk_X509_INFO_new_null() ((STACK_OF(X509_INFO) *)OPENSSL_sk_new_null()) +#define sk_X509_INFO_new_reserve(cmp, n) ((STACK_OF(X509_INFO) *)OPENSSL_sk_new_reserve(ossl_check_X509_INFO_compfunc_type(cmp), (n))) +#define sk_X509_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_INFO_sk_type(sk), (n)) +#define sk_X509_INFO_free(sk) OPENSSL_sk_free(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_delete(sk, i) ((X509_INFO *)OPENSSL_sk_delete(ossl_check_X509_INFO_sk_type(sk), (i))) +#define sk_X509_INFO_delete_ptr(sk, ptr) ((X509_INFO *)OPENSSL_sk_delete_ptr(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr))) +#define sk_X509_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_pop(sk) ((X509_INFO *)OPENSSL_sk_pop(ossl_check_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_shift(sk) ((X509_INFO *)OPENSSL_sk_shift(ossl_check_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_INFO_sk_type(sk),ossl_check_X509_INFO_freefunc_type(freefunc)) +#define sk_X509_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr), (idx)) +#define sk_X509_INFO_set(sk, idx, ptr) ((X509_INFO *)OPENSSL_sk_set(ossl_check_X509_INFO_sk_type(sk), (idx), ossl_check_X509_INFO_type(ptr))) +#define sk_X509_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr), pnum) +#define sk_X509_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_dup(sk) ((STACK_OF(X509_INFO) *)OPENSSL_sk_dup(ossl_check_const_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_INFO_sk_type(sk), ossl_check_X509_INFO_copyfunc_type(copyfunc), ossl_check_X509_INFO_freefunc_type(freefunc))) +#define sk_X509_INFO_set_cmp_func(sk, cmp) ((sk_X509_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_compfunc_type(cmp))) + + +/* + * The next 2 structures and their 8 routines are used to manipulate Netscape's + * spki structures - useful if you are writing a CA web page + */ +typedef struct Netscape_spkac_st { + X509_PUBKEY *pubkey; + ASN1_IA5STRING *challenge; /* challenge sent in atlas >= PR2 */ +} NETSCAPE_SPKAC; + +typedef struct Netscape_spki_st { + NETSCAPE_SPKAC *spkac; /* signed public key and challenge */ + X509_ALGOR sig_algor; + ASN1_BIT_STRING *signature; +} NETSCAPE_SPKI; + +/* Netscape certificate sequence structure */ +typedef struct Netscape_certificate_sequence { + ASN1_OBJECT *type; + STACK_OF(X509) *certs; +} NETSCAPE_CERT_SEQUENCE; + +/*- Unused (and iv length is wrong) +typedef struct CBCParameter_st + { + unsigned char iv[8]; + } CBC_PARAM; +*/ + +/* Password based encryption structure */ + +typedef struct PBEPARAM_st { + ASN1_OCTET_STRING *salt; + ASN1_INTEGER *iter; +} PBEPARAM; + +/* Password based encryption V2 structures */ + +typedef struct PBE2PARAM_st { + X509_ALGOR *keyfunc; + X509_ALGOR *encryption; +} PBE2PARAM; + +typedef struct PBKDF2PARAM_st { +/* Usually OCTET STRING but could be anything */ + ASN1_TYPE *salt; + ASN1_INTEGER *iter; + ASN1_INTEGER *keylength; + X509_ALGOR *prf; +} PBKDF2PARAM; + +#ifndef OPENSSL_NO_SCRYPT +typedef struct SCRYPT_PARAMS_st { + ASN1_OCTET_STRING *salt; + ASN1_INTEGER *costParameter; + ASN1_INTEGER *blockSize; + ASN1_INTEGER *parallelizationParameter; + ASN1_INTEGER *keyLength; +} SCRYPT_PARAMS; +#endif + +#ifdef __cplusplus +} +#endif + +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# define X509_EXT_PACK_UNKNOWN 1 +# define X509_EXT_PACK_STRING 2 + +# define X509_extract_key(x) X509_get_pubkey(x)/*****/ +# define X509_REQ_extract_key(a) X509_REQ_get_pubkey(a) +# define X509_name_cmp(a,b) X509_NAME_cmp((a),(b)) + +void X509_CRL_set_default_method(const X509_CRL_METHOD *meth); +X509_CRL_METHOD *X509_CRL_METHOD_new(int (*crl_init) (X509_CRL *crl), + int (*crl_free) (X509_CRL *crl), + int (*crl_lookup) (X509_CRL *crl, + X509_REVOKED **ret, + const + ASN1_INTEGER *serial, + const + X509_NAME *issuer), + int (*crl_verify) (X509_CRL *crl, + EVP_PKEY *pk)); +void X509_CRL_METHOD_free(X509_CRL_METHOD *m); + +void X509_CRL_set_meth_data(X509_CRL *crl, void *dat); +void *X509_CRL_get_meth_data(X509_CRL *crl); + +const char *X509_verify_cert_error_string(long n); + +int X509_verify(X509 *a, EVP_PKEY *r); +int X509_self_signed(X509 *cert, int verify_signature); + +int X509_REQ_verify_ex(X509_REQ *a, EVP_PKEY *r, OSSL_LIB_CTX *libctx, + const char *propq); +int X509_REQ_verify(X509_REQ *a, EVP_PKEY *r); +int X509_CRL_verify(X509_CRL *a, EVP_PKEY *r); +int NETSCAPE_SPKI_verify(NETSCAPE_SPKI *a, EVP_PKEY *r); + +NETSCAPE_SPKI *NETSCAPE_SPKI_b64_decode(const char *str, int len); +char *NETSCAPE_SPKI_b64_encode(NETSCAPE_SPKI *x); +EVP_PKEY *NETSCAPE_SPKI_get_pubkey(NETSCAPE_SPKI *x); +int NETSCAPE_SPKI_set_pubkey(NETSCAPE_SPKI *x, EVP_PKEY *pkey); + +int NETSCAPE_SPKI_print(BIO *out, NETSCAPE_SPKI *spki); + +int X509_signature_dump(BIO *bp, const ASN1_STRING *sig, int indent); +int X509_signature_print(BIO *bp, const X509_ALGOR *alg, + const ASN1_STRING *sig); + +int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx); +int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx); +int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx); +int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md); + +int X509_pubkey_digest(const X509 *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_digest(const X509 *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +ASN1_OCTET_STRING *X509_digest_sig(const X509 *cert, + EVP_MD **md_used, int *md_is_fallback); +int X509_CRL_digest(const X509_CRL *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_REQ_digest(const X509_REQ *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_NAME_digest(const X509_NAME *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); + +X509 *X509_load_http(const char *url, BIO *bio, BIO *rbio, int timeout); +X509_CRL *X509_CRL_load_http(const char *url, BIO *bio, BIO *rbio, int timeout); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# include /* OSSL_HTTP_REQ_CTX_nbio_d2i */ +# define X509_http_nbio(rctx, pcert) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(rctx, pcert, ASN1_ITEM_rptr(X509)) +# define X509_CRL_http_nbio(rctx, pcrl) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(rctx, pcrl, ASN1_ITEM_rptr(X509_CRL)) +# endif + +# ifndef OPENSSL_NO_STDIO +X509 *d2i_X509_fp(FILE *fp, X509 **x509); +int i2d_X509_fp(FILE *fp, const X509 *x509); +X509_CRL *d2i_X509_CRL_fp(FILE *fp, X509_CRL **crl); +int i2d_X509_CRL_fp(FILE *fp, const X509_CRL *crl); +X509_REQ *d2i_X509_REQ_fp(FILE *fp, X509_REQ **req); +int i2d_X509_REQ_fp(FILE *fp, const X509_REQ *req); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPrivateKey_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPrivateKey_fp(FILE *fp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPublicKey_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPublicKey_fp(FILE *fp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSA_PUBKEY_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSA_PUBKEY_fp(FILE *fp, const RSA *rsa); +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSA_PUBKEY_fp(FILE *fp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSA_PUBKEY_fp(FILE *fp, const DSA *dsa); +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSAPrivateKey_fp(FILE *fp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSAPrivateKey_fp(FILE *fp, const DSA *dsa); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_EC_PUBKEY_fp(FILE *fp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_EC_PUBKEY_fp(FILE *fp, const EC_KEY *eckey); +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_ECPrivateKey_fp(FILE *fp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_ECPrivateKey_fp(FILE *fp, const EC_KEY *eckey); +# endif /* OPENSSL_NO_EC */ +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ +X509_SIG *d2i_PKCS8_fp(FILE *fp, X509_SIG **p8); +int i2d_PKCS8_fp(FILE *fp, const X509_SIG *p8); +X509_PUBKEY *d2i_X509_PUBKEY_fp(FILE *fp, X509_PUBKEY **xpk); +int i2d_X509_PUBKEY_fp(FILE *fp, const X509_PUBKEY *xpk); +PKCS8_PRIV_KEY_INFO *d2i_PKCS8_PRIV_KEY_INFO_fp(FILE *fp, + PKCS8_PRIV_KEY_INFO **p8inf); +int i2d_PKCS8_PRIV_KEY_INFO_fp(FILE *fp, const PKCS8_PRIV_KEY_INFO *p8inf); +int i2d_PKCS8PrivateKeyInfo_fp(FILE *fp, const EVP_PKEY *key); +int i2d_PrivateKey_fp(FILE *fp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PrivateKey_ex_fp(FILE *fp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PrivateKey_fp(FILE *fp, EVP_PKEY **a); +int i2d_PUBKEY_fp(FILE *fp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PUBKEY_ex_fp(FILE *fp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PUBKEY_fp(FILE *fp, EVP_PKEY **a); +# endif + +X509 *d2i_X509_bio(BIO *bp, X509 **x509); +int i2d_X509_bio(BIO *bp, const X509 *x509); +X509_CRL *d2i_X509_CRL_bio(BIO *bp, X509_CRL **crl); +int i2d_X509_CRL_bio(BIO *bp, const X509_CRL *crl); +X509_REQ *d2i_X509_REQ_bio(BIO *bp, X509_REQ **req); +int i2d_X509_REQ_bio(BIO *bp, const X509_REQ *req); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPrivateKey_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPrivateKey_bio(BIO *bp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPublicKey_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPublicKey_bio(BIO *bp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSA_PUBKEY_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSA_PUBKEY_bio(BIO *bp, const RSA *rsa); +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSA_PUBKEY_bio(BIO *bp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSA_PUBKEY_bio(BIO *bp, const DSA *dsa); +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSAPrivateKey_bio(BIO *bp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSAPrivateKey_bio(BIO *bp, const DSA *dsa); +# endif +# endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_EC_PUBKEY_bio(BIO *bp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_EC_PUBKEY_bio(BIO *bp, const EC_KEY *eckey); +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_ECPrivateKey_bio(BIO *bp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_ECPrivateKey_bio(BIO *bp, const EC_KEY *eckey); +# endif /* OPENSSL_NO_EC */ +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ + +X509_SIG *d2i_PKCS8_bio(BIO *bp, X509_SIG **p8); +int i2d_PKCS8_bio(BIO *bp, const X509_SIG *p8); +X509_PUBKEY *d2i_X509_PUBKEY_bio(BIO *bp, X509_PUBKEY **xpk); +int i2d_X509_PUBKEY_bio(BIO *bp, const X509_PUBKEY *xpk); +PKCS8_PRIV_KEY_INFO *d2i_PKCS8_PRIV_KEY_INFO_bio(BIO *bp, + PKCS8_PRIV_KEY_INFO **p8inf); +int i2d_PKCS8_PRIV_KEY_INFO_bio(BIO *bp, const PKCS8_PRIV_KEY_INFO *p8inf); +int i2d_PKCS8PrivateKeyInfo_bio(BIO *bp, const EVP_PKEY *key); +int i2d_PrivateKey_bio(BIO *bp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PrivateKey_ex_bio(BIO *bp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PrivateKey_bio(BIO *bp, EVP_PKEY **a); +int i2d_PUBKEY_bio(BIO *bp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PUBKEY_ex_bio(BIO *bp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PUBKEY_bio(BIO *bp, EVP_PKEY **a); + +DECLARE_ASN1_DUP_FUNCTION(X509) +DECLARE_ASN1_DUP_FUNCTION(X509_ALGOR) +DECLARE_ASN1_DUP_FUNCTION(X509_ATTRIBUTE) +DECLARE_ASN1_DUP_FUNCTION(X509_CRL) +DECLARE_ASN1_DUP_FUNCTION(X509_EXTENSION) +DECLARE_ASN1_DUP_FUNCTION(X509_PUBKEY) +DECLARE_ASN1_DUP_FUNCTION(X509_REQ) +DECLARE_ASN1_DUP_FUNCTION(X509_REVOKED) +int X509_ALGOR_set0(X509_ALGOR *alg, ASN1_OBJECT *aobj, int ptype, + void *pval); +void X509_ALGOR_get0(const ASN1_OBJECT **paobj, int *pptype, + const void **ppval, const X509_ALGOR *algor); +void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); +int X509_ALGOR_cmp(const X509_ALGOR *a, const X509_ALGOR *b); +int X509_ALGOR_copy(X509_ALGOR *dest, const X509_ALGOR *src); + +DECLARE_ASN1_DUP_FUNCTION(X509_NAME) +DECLARE_ASN1_DUP_FUNCTION(X509_NAME_ENTRY) + +int X509_cmp_time(const ASN1_TIME *s, time_t *t); +int X509_cmp_current_time(const ASN1_TIME *s); +int X509_cmp_timeframe(const X509_VERIFY_PARAM *vpm, + const ASN1_TIME *start, const ASN1_TIME *end); +ASN1_TIME *X509_time_adj(ASN1_TIME *s, long adj, time_t *t); +ASN1_TIME *X509_time_adj_ex(ASN1_TIME *s, + int offset_day, long offset_sec, time_t *t); +ASN1_TIME *X509_gmtime_adj(ASN1_TIME *s, long adj); + +const char *X509_get_default_cert_area(void); +const char *X509_get_default_cert_dir(void); +const char *X509_get_default_cert_file(void); +const char *X509_get_default_cert_dir_env(void); +const char *X509_get_default_cert_file_env(void); +const char *X509_get_default_private_dir(void); + +X509_REQ *X509_to_X509_REQ(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); +X509 *X509_REQ_to_X509(X509_REQ *r, int days, EVP_PKEY *pkey); + +DECLARE_ASN1_FUNCTIONS(X509_ALGOR) +DECLARE_ASN1_ENCODE_FUNCTIONS(X509_ALGORS, X509_ALGORS, X509_ALGORS) +DECLARE_ASN1_FUNCTIONS(X509_VAL) + +DECLARE_ASN1_FUNCTIONS(X509_PUBKEY) + +X509_PUBKEY *X509_PUBKEY_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +int X509_PUBKEY_set(X509_PUBKEY **x, EVP_PKEY *pkey); +EVP_PKEY *X509_PUBKEY_get0(const X509_PUBKEY *key); +EVP_PKEY *X509_PUBKEY_get(const X509_PUBKEY *key); +int X509_get_pubkey_parameters(EVP_PKEY *pkey, STACK_OF(X509) *chain); +long X509_get_pathlen(X509 *x); +DECLARE_ASN1_ENCODE_FUNCTIONS_only(EVP_PKEY, PUBKEY) +EVP_PKEY *d2i_PUBKEY_ex(EVP_PKEY **a, const unsigned char **pp, long length, + OSSL_LIB_CTX *libctx, const char *propq); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0,RSA, RSA_PUBKEY) +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0,DSA, DSA_PUBKEY) +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0, EC_KEY, EC_PUBKEY) +# endif +# endif + +DECLARE_ASN1_FUNCTIONS(X509_SIG) +void X509_SIG_get0(const X509_SIG *sig, const X509_ALGOR **palg, + const ASN1_OCTET_STRING **pdigest); +void X509_SIG_getm(X509_SIG *sig, X509_ALGOR **palg, + ASN1_OCTET_STRING **pdigest); + +DECLARE_ASN1_FUNCTIONS(X509_REQ_INFO) +DECLARE_ASN1_FUNCTIONS(X509_REQ) +X509_REQ *X509_REQ_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +DECLARE_ASN1_FUNCTIONS(X509_ATTRIBUTE) +X509_ATTRIBUTE *X509_ATTRIBUTE_create(int nid, int atrtype, void *value); + +DECLARE_ASN1_FUNCTIONS(X509_EXTENSION) +DECLARE_ASN1_ENCODE_FUNCTIONS(X509_EXTENSIONS, X509_EXTENSIONS, X509_EXTENSIONS) + +DECLARE_ASN1_FUNCTIONS(X509_NAME_ENTRY) + +DECLARE_ASN1_FUNCTIONS(X509_NAME) + +int X509_NAME_set(X509_NAME **xn, const X509_NAME *name); + +DECLARE_ASN1_FUNCTIONS(X509_CINF) +DECLARE_ASN1_FUNCTIONS(X509) +X509 *X509_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +DECLARE_ASN1_FUNCTIONS(X509_CERT_AUX) + +#define X509_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509, l, p, newf, dupf, freef) +int X509_set_ex_data(X509 *r, int idx, void *arg); +void *X509_get_ex_data(const X509 *r, int idx); +DECLARE_ASN1_ENCODE_FUNCTIONS_only(X509,X509_AUX) + +int i2d_re_X509_tbs(X509 *x, unsigned char **pp); + +int X509_SIG_INFO_get(const X509_SIG_INFO *siginf, int *mdnid, int *pknid, + int *secbits, uint32_t *flags); +void X509_SIG_INFO_set(X509_SIG_INFO *siginf, int mdnid, int pknid, + int secbits, uint32_t flags); + +int X509_get_signature_info(X509 *x, int *mdnid, int *pknid, int *secbits, + uint32_t *flags); + +void X509_get0_signature(const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg, const X509 *x); +int X509_get_signature_nid(const X509 *x); + +void X509_set0_distinguishing_id(X509 *x, ASN1_OCTET_STRING *d_id); +ASN1_OCTET_STRING *X509_get0_distinguishing_id(X509 *x); +void X509_REQ_set0_distinguishing_id(X509_REQ *x, ASN1_OCTET_STRING *d_id); +ASN1_OCTET_STRING *X509_REQ_get0_distinguishing_id(X509_REQ *x); + +int X509_alias_set1(X509 *x, const unsigned char *name, int len); +int X509_keyid_set1(X509 *x, const unsigned char *id, int len); +unsigned char *X509_alias_get0(X509 *x, int *len); +unsigned char *X509_keyid_get0(X509 *x, int *len); + +DECLARE_ASN1_FUNCTIONS(X509_REVOKED) +DECLARE_ASN1_FUNCTIONS(X509_CRL_INFO) +DECLARE_ASN1_FUNCTIONS(X509_CRL) +X509_CRL *X509_CRL_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +int X509_CRL_add0_revoked(X509_CRL *crl, X509_REVOKED *rev); +int X509_CRL_get0_by_serial(X509_CRL *crl, + X509_REVOKED **ret, const ASN1_INTEGER *serial); +int X509_CRL_get0_by_cert(X509_CRL *crl, X509_REVOKED **ret, X509 *x); + +X509_PKEY *X509_PKEY_new(void); +void X509_PKEY_free(X509_PKEY *a); + +DECLARE_ASN1_FUNCTIONS(NETSCAPE_SPKI) +DECLARE_ASN1_FUNCTIONS(NETSCAPE_SPKAC) +DECLARE_ASN1_FUNCTIONS(NETSCAPE_CERT_SEQUENCE) + +X509_INFO *X509_INFO_new(void); +void X509_INFO_free(X509_INFO *a); +char *X509_NAME_oneline(const X509_NAME *a, char *buf, int size); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +int ASN1_verify(i2d_of_void *i2d, X509_ALGOR *algor1, + ASN1_BIT_STRING *signature, char *data, EVP_PKEY *pkey); +OSSL_DEPRECATEDIN_3_0 +int ASN1_digest(i2d_of_void *i2d, const EVP_MD *type, char *data, + unsigned char *md, unsigned int *len); +OSSL_DEPRECATEDIN_3_0 +int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, char *data, EVP_PKEY *pkey, + const EVP_MD *type); +#endif +int ASN1_item_digest(const ASN1_ITEM *it, const EVP_MD *type, void *data, + unsigned char *md, unsigned int *len); +int ASN1_item_verify(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + EVP_PKEY *pkey); +int ASN1_item_verify_ctx(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + EVP_MD_CTX *ctx); +int ASN1_item_sign(const ASN1_ITEM *it, X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, const void *data, + EVP_PKEY *pkey, const EVP_MD *md); +int ASN1_item_sign_ctx(const ASN1_ITEM *it, X509_ALGOR *algor1, + X509_ALGOR *algor2, ASN1_BIT_STRING *signature, + const void *data, EVP_MD_CTX *ctx); + +#define X509_VERSION_1 0 +#define X509_VERSION_2 1 +#define X509_VERSION_3 2 + +long X509_get_version(const X509 *x); +int X509_set_version(X509 *x, long version); +int X509_set_serialNumber(X509 *x, ASN1_INTEGER *serial); +ASN1_INTEGER *X509_get_serialNumber(X509 *x); +const ASN1_INTEGER *X509_get0_serialNumber(const X509 *x); +int X509_set_issuer_name(X509 *x, const X509_NAME *name); +X509_NAME *X509_get_issuer_name(const X509 *a); +int X509_set_subject_name(X509 *x, const X509_NAME *name); +X509_NAME *X509_get_subject_name(const X509 *a); +const ASN1_TIME * X509_get0_notBefore(const X509 *x); +ASN1_TIME *X509_getm_notBefore(const X509 *x); +int X509_set1_notBefore(X509 *x, const ASN1_TIME *tm); +const ASN1_TIME *X509_get0_notAfter(const X509 *x); +ASN1_TIME *X509_getm_notAfter(const X509 *x); +int X509_set1_notAfter(X509 *x, const ASN1_TIME *tm); +int X509_set_pubkey(X509 *x, EVP_PKEY *pkey); +int X509_up_ref(X509 *x); +int X509_get_signature_type(const X509 *x); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_get_notBefore X509_getm_notBefore +# define X509_get_notAfter X509_getm_notAfter +# define X509_set_notBefore X509_set1_notBefore +# define X509_set_notAfter X509_set1_notAfter +#endif + + +/* + * This one is only used so that a binary form can output, as in + * i2d_X509_PUBKEY(X509_get_X509_PUBKEY(x), &buf) + */ +X509_PUBKEY *X509_get_X509_PUBKEY(const X509 *x); +const STACK_OF(X509_EXTENSION) *X509_get0_extensions(const X509 *x); +void X509_get0_uids(const X509 *x, const ASN1_BIT_STRING **piuid, + const ASN1_BIT_STRING **psuid); +const X509_ALGOR *X509_get0_tbs_sigalg(const X509 *x); + +EVP_PKEY *X509_get0_pubkey(const X509 *x); +EVP_PKEY *X509_get_pubkey(X509 *x); +ASN1_BIT_STRING *X509_get0_pubkey_bitstr(const X509 *x); + +#define X509_REQ_VERSION_1 0 + +long X509_REQ_get_version(const X509_REQ *req); +int X509_REQ_set_version(X509_REQ *x, long version); +X509_NAME *X509_REQ_get_subject_name(const X509_REQ *req); +int X509_REQ_set_subject_name(X509_REQ *req, const X509_NAME *name); +void X509_REQ_get0_signature(const X509_REQ *req, const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg); +void X509_REQ_set0_signature(X509_REQ *req, ASN1_BIT_STRING *psig); +int X509_REQ_set1_signature_algo(X509_REQ *req, X509_ALGOR *palg); +int X509_REQ_get_signature_nid(const X509_REQ *req); +int i2d_re_X509_REQ_tbs(X509_REQ *req, unsigned char **pp); +int X509_REQ_set_pubkey(X509_REQ *x, EVP_PKEY *pkey); +EVP_PKEY *X509_REQ_get_pubkey(X509_REQ *req); +EVP_PKEY *X509_REQ_get0_pubkey(const X509_REQ *req); +X509_PUBKEY *X509_REQ_get_X509_PUBKEY(X509_REQ *req); +int X509_REQ_extension_nid(int nid); +int *X509_REQ_get_extension_nids(void); +void X509_REQ_set_extension_nids(int *nids); +STACK_OF(X509_EXTENSION) *X509_REQ_get_extensions(X509_REQ *req); +int X509_REQ_add_extensions_nid(X509_REQ *req, + const STACK_OF(X509_EXTENSION) *exts, int nid); +int X509_REQ_add_extensions(X509_REQ *req, const STACK_OF(X509_EXTENSION) *ext); +int X509_REQ_get_attr_count(const X509_REQ *req); +int X509_REQ_get_attr_by_NID(const X509_REQ *req, int nid, int lastpos); +int X509_REQ_get_attr_by_OBJ(const X509_REQ *req, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *X509_REQ_get_attr(const X509_REQ *req, int loc); +X509_ATTRIBUTE *X509_REQ_delete_attr(X509_REQ *req, int loc); +int X509_REQ_add1_attr(X509_REQ *req, X509_ATTRIBUTE *attr); +int X509_REQ_add1_attr_by_OBJ(X509_REQ *req, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len); +int X509_REQ_add1_attr_by_NID(X509_REQ *req, + int nid, int type, + const unsigned char *bytes, int len); +int X509_REQ_add1_attr_by_txt(X509_REQ *req, + const char *attrname, int type, + const unsigned char *bytes, int len); + +#define X509_CRL_VERSION_1 0 +#define X509_CRL_VERSION_2 1 + +int X509_CRL_set_version(X509_CRL *x, long version); +int X509_CRL_set_issuer_name(X509_CRL *x, const X509_NAME *name); +int X509_CRL_set1_lastUpdate(X509_CRL *x, const ASN1_TIME *tm); +int X509_CRL_set1_nextUpdate(X509_CRL *x, const ASN1_TIME *tm); +int X509_CRL_sort(X509_CRL *crl); +int X509_CRL_up_ref(X509_CRL *crl); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_CRL_set_lastUpdate X509_CRL_set1_lastUpdate +# define X509_CRL_set_nextUpdate X509_CRL_set1_nextUpdate +#endif + +long X509_CRL_get_version(const X509_CRL *crl); +const ASN1_TIME *X509_CRL_get0_lastUpdate(const X509_CRL *crl); +const ASN1_TIME *X509_CRL_get0_nextUpdate(const X509_CRL *crl); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 ASN1_TIME *X509_CRL_get_lastUpdate(X509_CRL *crl); +OSSL_DEPRECATEDIN_1_1_0 ASN1_TIME *X509_CRL_get_nextUpdate(X509_CRL *crl); +#endif +X509_NAME *X509_CRL_get_issuer(const X509_CRL *crl); +const STACK_OF(X509_EXTENSION) *X509_CRL_get0_extensions(const X509_CRL *crl); +STACK_OF(X509_REVOKED) *X509_CRL_get_REVOKED(X509_CRL *crl); +void X509_CRL_get0_signature(const X509_CRL *crl, const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg); +int X509_CRL_get_signature_nid(const X509_CRL *crl); +int i2d_re_X509_CRL_tbs(X509_CRL *req, unsigned char **pp); + +const ASN1_INTEGER *X509_REVOKED_get0_serialNumber(const X509_REVOKED *x); +int X509_REVOKED_set_serialNumber(X509_REVOKED *x, ASN1_INTEGER *serial); +const ASN1_TIME *X509_REVOKED_get0_revocationDate(const X509_REVOKED *x); +int X509_REVOKED_set_revocationDate(X509_REVOKED *r, ASN1_TIME *tm); +const STACK_OF(X509_EXTENSION) * +X509_REVOKED_get0_extensions(const X509_REVOKED *r); + +X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer, + EVP_PKEY *skey, const EVP_MD *md, unsigned int flags); + +int X509_REQ_check_private_key(const X509_REQ *req, EVP_PKEY *pkey); + +int X509_check_private_key(const X509 *cert, const EVP_PKEY *pkey); +int X509_chain_check_suiteb(int *perror_depth, + X509 *x, STACK_OF(X509) *chain, + unsigned long flags); +int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags); +void OSSL_STACK_OF_X509_free(STACK_OF(X509) *certs); +STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain); + +int X509_issuer_and_serial_cmp(const X509 *a, const X509 *b); +unsigned long X509_issuer_and_serial_hash(X509 *a); + +int X509_issuer_name_cmp(const X509 *a, const X509 *b); +unsigned long X509_issuer_name_hash(X509 *a); + +int X509_subject_name_cmp(const X509 *a, const X509 *b); +unsigned long X509_subject_name_hash(X509 *x); + +# ifndef OPENSSL_NO_MD5 +unsigned long X509_issuer_name_hash_old(X509 *a); +unsigned long X509_subject_name_hash_old(X509 *x); +# endif + +# define X509_ADD_FLAG_DEFAULT 0 +# define X509_ADD_FLAG_UP_REF 0x1 +# define X509_ADD_FLAG_PREPEND 0x2 +# define X509_ADD_FLAG_NO_DUP 0x4 +# define X509_ADD_FLAG_NO_SS 0x8 +int X509_add_cert(STACK_OF(X509) *sk, X509 *cert, int flags); +int X509_add_certs(STACK_OF(X509) *sk, STACK_OF(X509) *certs, int flags); + +int X509_cmp(const X509 *a, const X509 *b); +int X509_NAME_cmp(const X509_NAME *a, const X509_NAME *b); +#ifndef OPENSSL_NO_DEPRECATED_3_0 +# define X509_NAME_hash(x) X509_NAME_hash_ex(x, NULL, NULL, NULL) +OSSL_DEPRECATEDIN_3_0 int X509_certificate_type(const X509 *x, + const EVP_PKEY *pubkey); +#endif +unsigned long X509_NAME_hash_ex(const X509_NAME *x, OSSL_LIB_CTX *libctx, + const char *propq, int *ok); +unsigned long X509_NAME_hash_old(const X509_NAME *x); + +int X509_CRL_cmp(const X509_CRL *a, const X509_CRL *b); +int X509_CRL_match(const X509_CRL *a, const X509_CRL *b); +int X509_aux_print(BIO *out, X509 *x, int indent); +# ifndef OPENSSL_NO_STDIO +int X509_print_ex_fp(FILE *bp, X509 *x, unsigned long nmflag, + unsigned long cflag); +int X509_print_fp(FILE *bp, X509 *x); +int X509_CRL_print_fp(FILE *bp, X509_CRL *x); +int X509_REQ_print_fp(FILE *bp, X509_REQ *req); +int X509_NAME_print_ex_fp(FILE *fp, const X509_NAME *nm, int indent, + unsigned long flags); +# endif + +int X509_NAME_print(BIO *bp, const X509_NAME *name, int obase); +int X509_NAME_print_ex(BIO *out, const X509_NAME *nm, int indent, + unsigned long flags); +int X509_print_ex(BIO *bp, X509 *x, unsigned long nmflag, + unsigned long cflag); +int X509_print(BIO *bp, X509 *x); +int X509_ocspid_print(BIO *bp, X509 *x); +int X509_CRL_print_ex(BIO *out, X509_CRL *x, unsigned long nmflag); +int X509_CRL_print(BIO *bp, X509_CRL *x); +int X509_REQ_print_ex(BIO *bp, X509_REQ *x, unsigned long nmflag, + unsigned long cflag); +int X509_REQ_print(BIO *bp, X509_REQ *req); + +int X509_NAME_entry_count(const X509_NAME *name); +int X509_NAME_get_text_by_NID(const X509_NAME *name, int nid, + char *buf, int len); +int X509_NAME_get_text_by_OBJ(const X509_NAME *name, const ASN1_OBJECT *obj, + char *buf, int len); + +/* + * NOTE: you should be passing -1, not 0 as lastpos. The functions that use + * lastpos, search after that position on. + */ +int X509_NAME_get_index_by_NID(const X509_NAME *name, int nid, int lastpos); +int X509_NAME_get_index_by_OBJ(const X509_NAME *name, const ASN1_OBJECT *obj, + int lastpos); +X509_NAME_ENTRY *X509_NAME_get_entry(const X509_NAME *name, int loc); +X509_NAME_ENTRY *X509_NAME_delete_entry(X509_NAME *name, int loc); +int X509_NAME_add_entry(X509_NAME *name, const X509_NAME_ENTRY *ne, + int loc, int set); +int X509_NAME_add_entry_by_OBJ(X509_NAME *name, const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len, int loc, + int set); +int X509_NAME_add_entry_by_NID(X509_NAME *name, int nid, int type, + const unsigned char *bytes, int len, int loc, + int set); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_txt(X509_NAME_ENTRY **ne, + const char *field, int type, + const unsigned char *bytes, + int len); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_NID(X509_NAME_ENTRY **ne, int nid, + int type, + const unsigned char *bytes, + int len); +int X509_NAME_add_entry_by_txt(X509_NAME *name, const char *field, int type, + const unsigned char *bytes, int len, int loc, + int set); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_OBJ(X509_NAME_ENTRY **ne, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, + int len); +int X509_NAME_ENTRY_set_object(X509_NAME_ENTRY *ne, const ASN1_OBJECT *obj); +int X509_NAME_ENTRY_set_data(X509_NAME_ENTRY *ne, int type, + const unsigned char *bytes, int len); +ASN1_OBJECT *X509_NAME_ENTRY_get_object(const X509_NAME_ENTRY *ne); +ASN1_STRING * X509_NAME_ENTRY_get_data(const X509_NAME_ENTRY *ne); +int X509_NAME_ENTRY_set(const X509_NAME_ENTRY *ne); + +int X509_NAME_get0_der(const X509_NAME *nm, const unsigned char **pder, + size_t *pderlen); + +int X509v3_get_ext_count(const STACK_OF(X509_EXTENSION) *x); +int X509v3_get_ext_by_NID(const STACK_OF(X509_EXTENSION) *x, + int nid, int lastpos); +int X509v3_get_ext_by_OBJ(const STACK_OF(X509_EXTENSION) *x, + const ASN1_OBJECT *obj, int lastpos); +int X509v3_get_ext_by_critical(const STACK_OF(X509_EXTENSION) *x, + int crit, int lastpos); +X509_EXTENSION *X509v3_get_ext(const STACK_OF(X509_EXTENSION) *x, int loc); +X509_EXTENSION *X509v3_delete_ext(STACK_OF(X509_EXTENSION) *x, int loc); +STACK_OF(X509_EXTENSION) *X509v3_add_ext(STACK_OF(X509_EXTENSION) **x, + X509_EXTENSION *ex, int loc); + +int X509_get_ext_count(const X509 *x); +int X509_get_ext_by_NID(const X509 *x, int nid, int lastpos); +int X509_get_ext_by_OBJ(const X509 *x, const ASN1_OBJECT *obj, int lastpos); +int X509_get_ext_by_critical(const X509 *x, int crit, int lastpos); +X509_EXTENSION *X509_get_ext(const X509 *x, int loc); +X509_EXTENSION *X509_delete_ext(X509 *x, int loc); +int X509_add_ext(X509 *x, X509_EXTENSION *ex, int loc); +void *X509_get_ext_d2i(const X509 *x, int nid, int *crit, int *idx); +int X509_add1_ext_i2d(X509 *x, int nid, void *value, int crit, + unsigned long flags); + +int X509_CRL_get_ext_count(const X509_CRL *x); +int X509_CRL_get_ext_by_NID(const X509_CRL *x, int nid, int lastpos); +int X509_CRL_get_ext_by_OBJ(const X509_CRL *x, const ASN1_OBJECT *obj, + int lastpos); +int X509_CRL_get_ext_by_critical(const X509_CRL *x, int crit, int lastpos); +X509_EXTENSION *X509_CRL_get_ext(const X509_CRL *x, int loc); +X509_EXTENSION *X509_CRL_delete_ext(X509_CRL *x, int loc); +int X509_CRL_add_ext(X509_CRL *x, X509_EXTENSION *ex, int loc); +void *X509_CRL_get_ext_d2i(const X509_CRL *x, int nid, int *crit, int *idx); +int X509_CRL_add1_ext_i2d(X509_CRL *x, int nid, void *value, int crit, + unsigned long flags); + +int X509_REVOKED_get_ext_count(const X509_REVOKED *x); +int X509_REVOKED_get_ext_by_NID(const X509_REVOKED *x, int nid, int lastpos); +int X509_REVOKED_get_ext_by_OBJ(const X509_REVOKED *x, const ASN1_OBJECT *obj, + int lastpos); +int X509_REVOKED_get_ext_by_critical(const X509_REVOKED *x, int crit, + int lastpos); +X509_EXTENSION *X509_REVOKED_get_ext(const X509_REVOKED *x, int loc); +X509_EXTENSION *X509_REVOKED_delete_ext(X509_REVOKED *x, int loc); +int X509_REVOKED_add_ext(X509_REVOKED *x, X509_EXTENSION *ex, int loc); +void *X509_REVOKED_get_ext_d2i(const X509_REVOKED *x, int nid, int *crit, + int *idx); +int X509_REVOKED_add1_ext_i2d(X509_REVOKED *x, int nid, void *value, int crit, + unsigned long flags); + +X509_EXTENSION *X509_EXTENSION_create_by_NID(X509_EXTENSION **ex, + int nid, int crit, + ASN1_OCTET_STRING *data); +X509_EXTENSION *X509_EXTENSION_create_by_OBJ(X509_EXTENSION **ex, + const ASN1_OBJECT *obj, int crit, + ASN1_OCTET_STRING *data); +int X509_EXTENSION_set_object(X509_EXTENSION *ex, const ASN1_OBJECT *obj); +int X509_EXTENSION_set_critical(X509_EXTENSION *ex, int crit); +int X509_EXTENSION_set_data(X509_EXTENSION *ex, ASN1_OCTET_STRING *data); +ASN1_OBJECT *X509_EXTENSION_get_object(X509_EXTENSION *ex); +ASN1_OCTET_STRING *X509_EXTENSION_get_data(X509_EXTENSION *ne); +int X509_EXTENSION_get_critical(const X509_EXTENSION *ex); + +int X509at_get_attr_count(const STACK_OF(X509_ATTRIBUTE) *x); +int X509at_get_attr_by_NID(const STACK_OF(X509_ATTRIBUTE) *x, int nid, + int lastpos); +int X509at_get_attr_by_OBJ(const STACK_OF(X509_ATTRIBUTE) *sk, + const ASN1_OBJECT *obj, int lastpos); +X509_ATTRIBUTE *X509at_get_attr(const STACK_OF(X509_ATTRIBUTE) *x, int loc); +X509_ATTRIBUTE *X509at_delete_attr(STACK_OF(X509_ATTRIBUTE) *x, int loc); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr(STACK_OF(X509_ATTRIBUTE) **x, + X509_ATTRIBUTE *attr); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_OBJ(STACK_OF(X509_ATTRIBUTE) + **x, const ASN1_OBJECT *obj, + int type, + const unsigned char *bytes, + int len); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_NID(STACK_OF(X509_ATTRIBUTE) + **x, int nid, int type, + const unsigned char *bytes, + int len); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_txt(STACK_OF(X509_ATTRIBUTE) + **x, const char *attrname, + int type, + const unsigned char *bytes, + int len); +void *X509at_get0_data_by_OBJ(const STACK_OF(X509_ATTRIBUTE) *x, + const ASN1_OBJECT *obj, int lastpos, int type); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_NID(X509_ATTRIBUTE **attr, int nid, + int atrtype, const void *data, + int len); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_OBJ(X509_ATTRIBUTE **attr, + const ASN1_OBJECT *obj, + int atrtype, const void *data, + int len); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_txt(X509_ATTRIBUTE **attr, + const char *atrname, int type, + const unsigned char *bytes, + int len); +int X509_ATTRIBUTE_set1_object(X509_ATTRIBUTE *attr, const ASN1_OBJECT *obj); +int X509_ATTRIBUTE_set1_data(X509_ATTRIBUTE *attr, int attrtype, + const void *data, int len); +void *X509_ATTRIBUTE_get0_data(X509_ATTRIBUTE *attr, int idx, int atrtype, + void *data); +int X509_ATTRIBUTE_count(const X509_ATTRIBUTE *attr); +ASN1_OBJECT *X509_ATTRIBUTE_get0_object(X509_ATTRIBUTE *attr); +ASN1_TYPE *X509_ATTRIBUTE_get0_type(X509_ATTRIBUTE *attr, int idx); + +int EVP_PKEY_get_attr_count(const EVP_PKEY *key); +int EVP_PKEY_get_attr_by_NID(const EVP_PKEY *key, int nid, int lastpos); +int EVP_PKEY_get_attr_by_OBJ(const EVP_PKEY *key, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *EVP_PKEY_get_attr(const EVP_PKEY *key, int loc); +X509_ATTRIBUTE *EVP_PKEY_delete_attr(EVP_PKEY *key, int loc); +int EVP_PKEY_add1_attr(EVP_PKEY *key, X509_ATTRIBUTE *attr); +int EVP_PKEY_add1_attr_by_OBJ(EVP_PKEY *key, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len); +int EVP_PKEY_add1_attr_by_NID(EVP_PKEY *key, + int nid, int type, + const unsigned char *bytes, int len); +int EVP_PKEY_add1_attr_by_txt(EVP_PKEY *key, + const char *attrname, int type, + const unsigned char *bytes, int len); + +/* lookup a cert from a X509 STACK */ +X509 *X509_find_by_issuer_and_serial(STACK_OF(X509) *sk, const X509_NAME *name, + const ASN1_INTEGER *serial); +X509 *X509_find_by_subject(STACK_OF(X509) *sk, const X509_NAME *name); + +DECLARE_ASN1_FUNCTIONS(PBEPARAM) +DECLARE_ASN1_FUNCTIONS(PBE2PARAM) +DECLARE_ASN1_FUNCTIONS(PBKDF2PARAM) +#ifndef OPENSSL_NO_SCRYPT +DECLARE_ASN1_FUNCTIONS(SCRYPT_PARAMS) +#endif + +int PKCS5_pbe_set0_algor(X509_ALGOR *algor, int alg, int iter, + const unsigned char *salt, int saltlen); +int PKCS5_pbe_set0_algor_ex(X509_ALGOR *algor, int alg, int iter, + const unsigned char *salt, int saltlen, + OSSL_LIB_CTX *libctx); + +X509_ALGOR *PKCS5_pbe_set(int alg, int iter, + const unsigned char *salt, int saltlen); +X509_ALGOR *PKCS5_pbe_set_ex(int alg, int iter, + const unsigned char *salt, int saltlen, + OSSL_LIB_CTX *libctx); + +X509_ALGOR *PKCS5_pbe2_set(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen); +X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen, + unsigned char *aiv, int prf_nid); +X509_ALGOR *PKCS5_pbe2_set_iv_ex(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen, + unsigned char *aiv, int prf_nid, + OSSL_LIB_CTX *libctx); + +#ifndef OPENSSL_NO_SCRYPT +X509_ALGOR *PKCS5_pbe2_set_scrypt(const EVP_CIPHER *cipher, + const unsigned char *salt, int saltlen, + unsigned char *aiv, uint64_t N, uint64_t r, + uint64_t p); +#endif + +X509_ALGOR *PKCS5_pbkdf2_set(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen); +X509_ALGOR *PKCS5_pbkdf2_set_ex(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen, + OSSL_LIB_CTX *libctx); + +/* PKCS#8 utilities */ + +DECLARE_ASN1_FUNCTIONS(PKCS8_PRIV_KEY_INFO) + +EVP_PKEY *EVP_PKCS82PKEY(const PKCS8_PRIV_KEY_INFO *p8); +EVP_PKEY *EVP_PKCS82PKEY_ex(const PKCS8_PRIV_KEY_INFO *p8, OSSL_LIB_CTX *libctx, + const char *propq); +PKCS8_PRIV_KEY_INFO *EVP_PKEY2PKCS8(const EVP_PKEY *pkey); + +int PKCS8_pkey_set0(PKCS8_PRIV_KEY_INFO *priv, ASN1_OBJECT *aobj, + int version, int ptype, void *pval, + unsigned char *penc, int penclen); +int PKCS8_pkey_get0(const ASN1_OBJECT **ppkalg, + const unsigned char **pk, int *ppklen, + const X509_ALGOR **pa, const PKCS8_PRIV_KEY_INFO *p8); + +const STACK_OF(X509_ATTRIBUTE) * +PKCS8_pkey_get0_attrs(const PKCS8_PRIV_KEY_INFO *p8); +int PKCS8_pkey_add1_attr(PKCS8_PRIV_KEY_INFO *p8, X509_ATTRIBUTE *attr); +int PKCS8_pkey_add1_attr_by_NID(PKCS8_PRIV_KEY_INFO *p8, int nid, int type, + const unsigned char *bytes, int len); +int PKCS8_pkey_add1_attr_by_OBJ(PKCS8_PRIV_KEY_INFO *p8, const ASN1_OBJECT *obj, + int type, const unsigned char *bytes, int len); + + +void X509_PUBKEY_set0_public_key(X509_PUBKEY *pub, + unsigned char *penc, int penclen); +int X509_PUBKEY_set0_param(X509_PUBKEY *pub, ASN1_OBJECT *aobj, + int ptype, void *pval, + unsigned char *penc, int penclen); +int X509_PUBKEY_get0_param(ASN1_OBJECT **ppkalg, + const unsigned char **pk, int *ppklen, + X509_ALGOR **pa, const X509_PUBKEY *pub); +int X509_PUBKEY_eq(const X509_PUBKEY *a, const X509_PUBKEY *b); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509_vfy.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509_vfy.h new file mode 100644 index 00000000000..d3dfff8c0d9 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509_vfy.h @@ -0,0 +1,901 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509_vfy.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509_VFY_H +# define OPENSSL_X509_VFY_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509_VFY_H +# endif + +/* + * Protect against recursion, x509.h and x509_vfy.h each include the other. + */ +# ifndef OPENSSL_X509_H +# include +# endif + +# include +# include +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/*- +SSL_CTX -> X509_STORE + -> X509_LOOKUP + ->X509_LOOKUP_METHOD + -> X509_LOOKUP + ->X509_LOOKUP_METHOD + +SSL -> X509_STORE_CTX + ->X509_STORE + +The X509_STORE holds the tables etc for verification stuff. +A X509_STORE_CTX is used while validating a single certificate. +The X509_STORE has X509_LOOKUPs for looking up certs. +The X509_STORE then calls a function to actually verify the +certificate chain. +*/ + +typedef enum { + X509_LU_NONE = 0, + X509_LU_X509, X509_LU_CRL +} X509_LOOKUP_TYPE; + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +#define X509_LU_RETRY -1 +#define X509_LU_FAIL 0 +#endif + +SKM_DEFINE_STACK_OF_INTERNAL(X509_LOOKUP, X509_LOOKUP, X509_LOOKUP) +#define sk_X509_LOOKUP_num(sk) OPENSSL_sk_num(ossl_check_const_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_value(sk, idx) ((X509_LOOKUP *)OPENSSL_sk_value(ossl_check_const_X509_LOOKUP_sk_type(sk), (idx))) +#define sk_X509_LOOKUP_new(cmp) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new(ossl_check_X509_LOOKUP_compfunc_type(cmp))) +#define sk_X509_LOOKUP_new_null() ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new_null()) +#define sk_X509_LOOKUP_new_reserve(cmp, n) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new_reserve(ossl_check_X509_LOOKUP_compfunc_type(cmp), (n))) +#define sk_X509_LOOKUP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_LOOKUP_sk_type(sk), (n)) +#define sk_X509_LOOKUP_free(sk) OPENSSL_sk_free(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_zero(sk) OPENSSL_sk_zero(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_delete(sk, i) ((X509_LOOKUP *)OPENSSL_sk_delete(ossl_check_X509_LOOKUP_sk_type(sk), (i))) +#define sk_X509_LOOKUP_delete_ptr(sk, ptr) ((X509_LOOKUP *)OPENSSL_sk_delete_ptr(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr))) +#define sk_X509_LOOKUP_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_pop(sk) ((X509_LOOKUP *)OPENSSL_sk_pop(ossl_check_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_shift(sk) ((X509_LOOKUP *)OPENSSL_sk_shift(ossl_check_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_LOOKUP_sk_type(sk),ossl_check_X509_LOOKUP_freefunc_type(freefunc)) +#define sk_X509_LOOKUP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr), (idx)) +#define sk_X509_LOOKUP_set(sk, idx, ptr) ((X509_LOOKUP *)OPENSSL_sk_set(ossl_check_X509_LOOKUP_sk_type(sk), (idx), ossl_check_X509_LOOKUP_type(ptr))) +#define sk_X509_LOOKUP_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr), pnum) +#define sk_X509_LOOKUP_sort(sk) OPENSSL_sk_sort(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_dup(sk) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_dup(ossl_check_const_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_copyfunc_type(copyfunc), ossl_check_X509_LOOKUP_freefunc_type(freefunc))) +#define sk_X509_LOOKUP_set_cmp_func(sk, cmp) ((sk_X509_LOOKUP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_OBJECT, X509_OBJECT, X509_OBJECT) +#define sk_X509_OBJECT_num(sk) OPENSSL_sk_num(ossl_check_const_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_value(sk, idx) ((X509_OBJECT *)OPENSSL_sk_value(ossl_check_const_X509_OBJECT_sk_type(sk), (idx))) +#define sk_X509_OBJECT_new(cmp) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new(ossl_check_X509_OBJECT_compfunc_type(cmp))) +#define sk_X509_OBJECT_new_null() ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new_null()) +#define sk_X509_OBJECT_new_reserve(cmp, n) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new_reserve(ossl_check_X509_OBJECT_compfunc_type(cmp), (n))) +#define sk_X509_OBJECT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_OBJECT_sk_type(sk), (n)) +#define sk_X509_OBJECT_free(sk) OPENSSL_sk_free(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_zero(sk) OPENSSL_sk_zero(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_delete(sk, i) ((X509_OBJECT *)OPENSSL_sk_delete(ossl_check_X509_OBJECT_sk_type(sk), (i))) +#define sk_X509_OBJECT_delete_ptr(sk, ptr) ((X509_OBJECT *)OPENSSL_sk_delete_ptr(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr))) +#define sk_X509_OBJECT_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_pop(sk) ((X509_OBJECT *)OPENSSL_sk_pop(ossl_check_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_shift(sk) ((X509_OBJECT *)OPENSSL_sk_shift(ossl_check_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_OBJECT_sk_type(sk),ossl_check_X509_OBJECT_freefunc_type(freefunc)) +#define sk_X509_OBJECT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr), (idx)) +#define sk_X509_OBJECT_set(sk, idx, ptr) ((X509_OBJECT *)OPENSSL_sk_set(ossl_check_X509_OBJECT_sk_type(sk), (idx), ossl_check_X509_OBJECT_type(ptr))) +#define sk_X509_OBJECT_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr), pnum) +#define sk_X509_OBJECT_sort(sk) OPENSSL_sk_sort(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_dup(sk) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_dup(ossl_check_const_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_copyfunc_type(copyfunc), ossl_check_X509_OBJECT_freefunc_type(freefunc))) +#define sk_X509_OBJECT_set_cmp_func(sk, cmp) ((sk_X509_OBJECT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_VERIFY_PARAM, X509_VERIFY_PARAM, X509_VERIFY_PARAM) +#define sk_X509_VERIFY_PARAM_num(sk) OPENSSL_sk_num(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_value(sk, idx) ((X509_VERIFY_PARAM *)OPENSSL_sk_value(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk), (idx))) +#define sk_X509_VERIFY_PARAM_new(cmp) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new(ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp))) +#define sk_X509_VERIFY_PARAM_new_null() ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new_null()) +#define sk_X509_VERIFY_PARAM_new_reserve(cmp, n) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new_reserve(ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp), (n))) +#define sk_X509_VERIFY_PARAM_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (n)) +#define sk_X509_VERIFY_PARAM_free(sk) OPENSSL_sk_free(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_zero(sk) OPENSSL_sk_zero(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_delete(sk, i) ((X509_VERIFY_PARAM *)OPENSSL_sk_delete(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (i))) +#define sk_X509_VERIFY_PARAM_delete_ptr(sk, ptr) ((X509_VERIFY_PARAM *)OPENSSL_sk_delete_ptr(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr))) +#define sk_X509_VERIFY_PARAM_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_pop(sk) ((X509_VERIFY_PARAM *)OPENSSL_sk_pop(ossl_check_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_shift(sk) ((X509_VERIFY_PARAM *)OPENSSL_sk_shift(ossl_check_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_VERIFY_PARAM_sk_type(sk),ossl_check_X509_VERIFY_PARAM_freefunc_type(freefunc)) +#define sk_X509_VERIFY_PARAM_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr), (idx)) +#define sk_X509_VERIFY_PARAM_set(sk, idx, ptr) ((X509_VERIFY_PARAM *)OPENSSL_sk_set(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (idx), ossl_check_X509_VERIFY_PARAM_type(ptr))) +#define sk_X509_VERIFY_PARAM_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr), pnum) +#define sk_X509_VERIFY_PARAM_sort(sk) OPENSSL_sk_sort(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_dup(sk) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_dup(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_copyfunc_type(copyfunc), ossl_check_X509_VERIFY_PARAM_freefunc_type(freefunc))) +#define sk_X509_VERIFY_PARAM_set_cmp_func(sk, cmp) ((sk_X509_VERIFY_PARAM_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp))) + + +/* This is used for a table of trust checking functions */ +typedef struct x509_trust_st { + int trust; + int flags; + int (*check_trust) (struct x509_trust_st *, X509 *, int); + char *name; + int arg1; + void *arg2; +} X509_TRUST; +SKM_DEFINE_STACK_OF_INTERNAL(X509_TRUST, X509_TRUST, X509_TRUST) +#define sk_X509_TRUST_num(sk) OPENSSL_sk_num(ossl_check_const_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_value(sk, idx) ((X509_TRUST *)OPENSSL_sk_value(ossl_check_const_X509_TRUST_sk_type(sk), (idx))) +#define sk_X509_TRUST_new(cmp) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new(ossl_check_X509_TRUST_compfunc_type(cmp))) +#define sk_X509_TRUST_new_null() ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new_null()) +#define sk_X509_TRUST_new_reserve(cmp, n) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new_reserve(ossl_check_X509_TRUST_compfunc_type(cmp), (n))) +#define sk_X509_TRUST_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_TRUST_sk_type(sk), (n)) +#define sk_X509_TRUST_free(sk) OPENSSL_sk_free(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_zero(sk) OPENSSL_sk_zero(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_delete(sk, i) ((X509_TRUST *)OPENSSL_sk_delete(ossl_check_X509_TRUST_sk_type(sk), (i))) +#define sk_X509_TRUST_delete_ptr(sk, ptr) ((X509_TRUST *)OPENSSL_sk_delete_ptr(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr))) +#define sk_X509_TRUST_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_pop(sk) ((X509_TRUST *)OPENSSL_sk_pop(ossl_check_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_shift(sk) ((X509_TRUST *)OPENSSL_sk_shift(ossl_check_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_TRUST_sk_type(sk),ossl_check_X509_TRUST_freefunc_type(freefunc)) +#define sk_X509_TRUST_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr), (idx)) +#define sk_X509_TRUST_set(sk, idx, ptr) ((X509_TRUST *)OPENSSL_sk_set(ossl_check_X509_TRUST_sk_type(sk), (idx), ossl_check_X509_TRUST_type(ptr))) +#define sk_X509_TRUST_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr), pnum) +#define sk_X509_TRUST_sort(sk) OPENSSL_sk_sort(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_dup(sk) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_dup(ossl_check_const_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_copyfunc_type(copyfunc), ossl_check_X509_TRUST_freefunc_type(freefunc))) +#define sk_X509_TRUST_set_cmp_func(sk, cmp) ((sk_X509_TRUST_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_compfunc_type(cmp))) + + +/* standard trust ids */ +# define X509_TRUST_DEFAULT 0 /* Only valid in purpose settings */ +# define X509_TRUST_COMPAT 1 +# define X509_TRUST_SSL_CLIENT 2 +# define X509_TRUST_SSL_SERVER 3 +# define X509_TRUST_EMAIL 4 +# define X509_TRUST_OBJECT_SIGN 5 +# define X509_TRUST_OCSP_SIGN 6 +# define X509_TRUST_OCSP_REQUEST 7 +# define X509_TRUST_TSA 8 +/* Keep these up to date! */ +# define X509_TRUST_MIN 1 +# define X509_TRUST_MAX 8 + +/* trust_flags values */ +# define X509_TRUST_DYNAMIC (1U << 0) +# define X509_TRUST_DYNAMIC_NAME (1U << 1) +/* No compat trust if self-signed, preempts "DO_SS" */ +# define X509_TRUST_NO_SS_COMPAT (1U << 2) +/* Compat trust if no explicit accepted trust EKUs */ +# define X509_TRUST_DO_SS_COMPAT (1U << 3) +/* Accept "anyEKU" as a wildcard rejection OID and as a wildcard trust OID */ +# define X509_TRUST_OK_ANY_EKU (1U << 4) + +/* check_trust return codes */ +# define X509_TRUST_TRUSTED 1 +# define X509_TRUST_REJECTED 2 +# define X509_TRUST_UNTRUSTED 3 + +int X509_TRUST_set(int *t, int trust); +int X509_TRUST_get_count(void); +X509_TRUST *X509_TRUST_get0(int idx); +int X509_TRUST_get_by_id(int id); +int X509_TRUST_add(int id, int flags, int (*ck) (X509_TRUST *, X509 *, int), + const char *name, int arg1, void *arg2); +void X509_TRUST_cleanup(void); +int X509_TRUST_get_flags(const X509_TRUST *xp); +char *X509_TRUST_get0_name(const X509_TRUST *xp); +int X509_TRUST_get_trust(const X509_TRUST *xp); + +int X509_trusted(const X509 *x); +int X509_add1_trust_object(X509 *x, const ASN1_OBJECT *obj); +int X509_add1_reject_object(X509 *x, const ASN1_OBJECT *obj); +void X509_trust_clear(X509 *x); +void X509_reject_clear(X509 *x); +STACK_OF(ASN1_OBJECT) *X509_get0_trust_objects(X509 *x); +STACK_OF(ASN1_OBJECT) *X509_get0_reject_objects(X509 *x); + +int (*X509_TRUST_set_default(int (*trust) (int, X509 *, int))) (int, X509 *, + int); +int X509_check_trust(X509 *x, int id, int flags); + +int X509_verify_cert(X509_STORE_CTX *ctx); +int X509_STORE_CTX_verify(X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_build_chain(X509 *target, STACK_OF(X509) *certs, + X509_STORE *store, int with_self_signed, + OSSL_LIB_CTX *libctx, const char *propq); + +int X509_STORE_set_depth(X509_STORE *store, int depth); + +typedef int (*X509_STORE_CTX_verify_cb)(int, X509_STORE_CTX *); +int X509_STORE_CTX_print_verify_cb(int ok, X509_STORE_CTX *ctx); +typedef int (*X509_STORE_CTX_verify_fn)(X509_STORE_CTX *); +typedef int (*X509_STORE_CTX_get_issuer_fn)(X509 **issuer, + X509_STORE_CTX *ctx, X509 *x); +typedef int (*X509_STORE_CTX_check_issued_fn)(X509_STORE_CTX *ctx, + X509 *x, X509 *issuer); +typedef int (*X509_STORE_CTX_check_revocation_fn)(X509_STORE_CTX *ctx); +typedef int (*X509_STORE_CTX_get_crl_fn)(X509_STORE_CTX *ctx, + X509_CRL **crl, X509 *x); +typedef int (*X509_STORE_CTX_check_crl_fn)(X509_STORE_CTX *ctx, X509_CRL *crl); +typedef int (*X509_STORE_CTX_cert_crl_fn)(X509_STORE_CTX *ctx, + X509_CRL *crl, X509 *x); +typedef int (*X509_STORE_CTX_check_policy_fn)(X509_STORE_CTX *ctx); +typedef STACK_OF(X509) + *(*X509_STORE_CTX_lookup_certs_fn)(X509_STORE_CTX *ctx, + const X509_NAME *nm); +typedef STACK_OF(X509_CRL) + *(*X509_STORE_CTX_lookup_crls_fn)(const X509_STORE_CTX *ctx, + const X509_NAME *nm); +typedef int (*X509_STORE_CTX_cleanup_fn)(X509_STORE_CTX *ctx); + +void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); + +# define X509_STORE_CTX_set_app_data(ctx,data) \ + X509_STORE_CTX_set_ex_data(ctx,0,data) +# define X509_STORE_CTX_get_app_data(ctx) \ + X509_STORE_CTX_get_ex_data(ctx,0) + +# define X509_L_FILE_LOAD 1 +# define X509_L_ADD_DIR 2 +# define X509_L_ADD_STORE 3 +# define X509_L_LOAD_STORE 4 + +# define X509_LOOKUP_load_file(x,name,type) \ + X509_LOOKUP_ctrl((x),X509_L_FILE_LOAD,(name),(long)(type),NULL) + +# define X509_LOOKUP_add_dir(x,name,type) \ + X509_LOOKUP_ctrl((x),X509_L_ADD_DIR,(name),(long)(type),NULL) + +# define X509_LOOKUP_add_store(x,name) \ + X509_LOOKUP_ctrl((x),X509_L_ADD_STORE,(name),0,NULL) + +# define X509_LOOKUP_load_store(x,name) \ + X509_LOOKUP_ctrl((x),X509_L_LOAD_STORE,(name),0,NULL) + +# define X509_LOOKUP_load_file_ex(x, name, type, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_FILE_LOAD, (name), (long)(type), NULL,\ + (libctx), (propq)) + +# define X509_LOOKUP_load_store_ex(x, name, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_LOAD_STORE, (name), 0, NULL, \ + (libctx), (propq)) + +# define X509_LOOKUP_add_store_ex(x, name, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_ADD_STORE, (name), 0, NULL, \ + (libctx), (propq)) + +# define X509_V_OK 0 +# define X509_V_ERR_UNSPECIFIED 1 +# define X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT 2 +# define X509_V_ERR_UNABLE_TO_GET_CRL 3 +# define X509_V_ERR_UNABLE_TO_DECRYPT_CERT_SIGNATURE 4 +# define X509_V_ERR_UNABLE_TO_DECRYPT_CRL_SIGNATURE 5 +# define X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY 6 +# define X509_V_ERR_CERT_SIGNATURE_FAILURE 7 +# define X509_V_ERR_CRL_SIGNATURE_FAILURE 8 +# define X509_V_ERR_CERT_NOT_YET_VALID 9 +# define X509_V_ERR_CERT_HAS_EXPIRED 10 +# define X509_V_ERR_CRL_NOT_YET_VALID 11 +# define X509_V_ERR_CRL_HAS_EXPIRED 12 +# define X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD 13 +# define X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD 14 +# define X509_V_ERR_ERROR_IN_CRL_LAST_UPDATE_FIELD 15 +# define X509_V_ERR_ERROR_IN_CRL_NEXT_UPDATE_FIELD 16 +# define X509_V_ERR_OUT_OF_MEM 17 +# define X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT 18 +# define X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN 19 +# define X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY 20 +# define X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE 21 +# define X509_V_ERR_CERT_CHAIN_TOO_LONG 22 +# define X509_V_ERR_CERT_REVOKED 23 +# define X509_V_ERR_NO_ISSUER_PUBLIC_KEY 24 +# define X509_V_ERR_PATH_LENGTH_EXCEEDED 25 +# define X509_V_ERR_INVALID_PURPOSE 26 +# define X509_V_ERR_CERT_UNTRUSTED 27 +# define X509_V_ERR_CERT_REJECTED 28 + +/* These are 'informational' when looking for issuer cert */ +# define X509_V_ERR_SUBJECT_ISSUER_MISMATCH 29 +# define X509_V_ERR_AKID_SKID_MISMATCH 30 +# define X509_V_ERR_AKID_ISSUER_SERIAL_MISMATCH 31 +# define X509_V_ERR_KEYUSAGE_NO_CERTSIGN 32 +# define X509_V_ERR_UNABLE_TO_GET_CRL_ISSUER 33 +# define X509_V_ERR_UNHANDLED_CRITICAL_EXTENSION 34 +# define X509_V_ERR_KEYUSAGE_NO_CRL_SIGN 35 +# define X509_V_ERR_UNHANDLED_CRITICAL_CRL_EXTENSION 36 +# define X509_V_ERR_INVALID_NON_CA 37 +# define X509_V_ERR_PROXY_PATH_LENGTH_EXCEEDED 38 +# define X509_V_ERR_KEYUSAGE_NO_DIGITAL_SIGNATURE 39 +# define X509_V_ERR_PROXY_CERTIFICATES_NOT_ALLOWED 40 +# define X509_V_ERR_INVALID_EXTENSION 41 +# define X509_V_ERR_INVALID_POLICY_EXTENSION 42 +# define X509_V_ERR_NO_EXPLICIT_POLICY 43 +# define X509_V_ERR_DIFFERENT_CRL_SCOPE 44 +# define X509_V_ERR_UNSUPPORTED_EXTENSION_FEATURE 45 +# define X509_V_ERR_UNNESTED_RESOURCE 46 +# define X509_V_ERR_PERMITTED_VIOLATION 47 +# define X509_V_ERR_EXCLUDED_VIOLATION 48 +# define X509_V_ERR_SUBTREE_MINMAX 49 +/* The application is not happy */ +# define X509_V_ERR_APPLICATION_VERIFICATION 50 +# define X509_V_ERR_UNSUPPORTED_CONSTRAINT_TYPE 51 +# define X509_V_ERR_UNSUPPORTED_CONSTRAINT_SYNTAX 52 +# define X509_V_ERR_UNSUPPORTED_NAME_SYNTAX 53 +# define X509_V_ERR_CRL_PATH_VALIDATION_ERROR 54 +/* Another issuer check debug option */ +# define X509_V_ERR_PATH_LOOP 55 +/* Suite B mode algorithm violation */ +# define X509_V_ERR_SUITE_B_INVALID_VERSION 56 +# define X509_V_ERR_SUITE_B_INVALID_ALGORITHM 57 +# define X509_V_ERR_SUITE_B_INVALID_CURVE 58 +# define X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM 59 +# define X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED 60 +# define X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256 61 +/* Host, email and IP check errors */ +# define X509_V_ERR_HOSTNAME_MISMATCH 62 +# define X509_V_ERR_EMAIL_MISMATCH 63 +# define X509_V_ERR_IP_ADDRESS_MISMATCH 64 +/* DANE TLSA errors */ +# define X509_V_ERR_DANE_NO_MATCH 65 +/* security level errors */ +# define X509_V_ERR_EE_KEY_TOO_SMALL 66 +# define X509_V_ERR_CA_KEY_TOO_SMALL 67 +# define X509_V_ERR_CA_MD_TOO_WEAK 68 +/* Caller error */ +# define X509_V_ERR_INVALID_CALL 69 +/* Issuer lookup error */ +# define X509_V_ERR_STORE_LOOKUP 70 +/* Certificate transparency */ +# define X509_V_ERR_NO_VALID_SCTS 71 + +# define X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION 72 +/* OCSP status errors */ +# define X509_V_ERR_OCSP_VERIFY_NEEDED 73 /* Need OCSP verification */ +# define X509_V_ERR_OCSP_VERIFY_FAILED 74 /* Couldn't verify cert through OCSP */ +# define X509_V_ERR_OCSP_CERT_UNKNOWN 75 /* Certificate wasn't recognized by the OCSP responder */ + +# define X509_V_ERR_UNSUPPORTED_SIGNATURE_ALGORITHM 76 +# define X509_V_ERR_SIGNATURE_ALGORITHM_MISMATCH 77 + +/* Errors in case a check in X509_V_FLAG_X509_STRICT mode fails */ +# define X509_V_ERR_SIGNATURE_ALGORITHM_INCONSISTENCY 78 +# define X509_V_ERR_INVALID_CA 79 +# define X509_V_ERR_PATHLEN_INVALID_FOR_NON_CA 80 +# define X509_V_ERR_PATHLEN_WITHOUT_KU_KEY_CERT_SIGN 81 +# define X509_V_ERR_KU_KEY_CERT_SIGN_INVALID_FOR_NON_CA 82 +# define X509_V_ERR_ISSUER_NAME_EMPTY 83 +# define X509_V_ERR_SUBJECT_NAME_EMPTY 84 +# define X509_V_ERR_MISSING_AUTHORITY_KEY_IDENTIFIER 85 +# define X509_V_ERR_MISSING_SUBJECT_KEY_IDENTIFIER 86 +# define X509_V_ERR_EMPTY_SUBJECT_ALT_NAME 87 +# define X509_V_ERR_EMPTY_SUBJECT_SAN_NOT_CRITICAL 88 +# define X509_V_ERR_CA_BCONS_NOT_CRITICAL 89 +# define X509_V_ERR_AUTHORITY_KEY_IDENTIFIER_CRITICAL 90 +# define X509_V_ERR_SUBJECT_KEY_IDENTIFIER_CRITICAL 91 +# define X509_V_ERR_CA_CERT_MISSING_KEY_USAGE 92 +# define X509_V_ERR_EXTENSIONS_REQUIRE_VERSION_3 93 +# define X509_V_ERR_EC_KEY_EXPLICIT_PARAMS 94 +# define X509_V_ERR_RPK_UNTRUSTED 95 + +/* Certificate verify flags */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_V_FLAG_CB_ISSUER_CHECK 0x0 /* Deprecated */ +# endif +/* Use check time instead of current time */ +# define X509_V_FLAG_USE_CHECK_TIME 0x2 +/* Lookup CRLs */ +# define X509_V_FLAG_CRL_CHECK 0x4 +/* Lookup CRLs for whole chain */ +# define X509_V_FLAG_CRL_CHECK_ALL 0x8 +/* Ignore unhandled critical extensions */ +# define X509_V_FLAG_IGNORE_CRITICAL 0x10 +/* Disable workarounds for broken certificates */ +# define X509_V_FLAG_X509_STRICT 0x20 +/* Enable proxy certificate validation */ +# define X509_V_FLAG_ALLOW_PROXY_CERTS 0x40 +/* Enable policy checking */ +# define X509_V_FLAG_POLICY_CHECK 0x80 +/* Policy variable require-explicit-policy */ +# define X509_V_FLAG_EXPLICIT_POLICY 0x100 +/* Policy variable inhibit-any-policy */ +# define X509_V_FLAG_INHIBIT_ANY 0x200 +/* Policy variable inhibit-policy-mapping */ +# define X509_V_FLAG_INHIBIT_MAP 0x400 +/* Notify callback that policy is OK */ +# define X509_V_FLAG_NOTIFY_POLICY 0x800 +/* Extended CRL features such as indirect CRLs, alternate CRL signing keys */ +# define X509_V_FLAG_EXTENDED_CRL_SUPPORT 0x1000 +/* Delta CRL support */ +# define X509_V_FLAG_USE_DELTAS 0x2000 +/* Check self-signed CA signature */ +# define X509_V_FLAG_CHECK_SS_SIGNATURE 0x4000 +/* Use trusted store first */ +# define X509_V_FLAG_TRUSTED_FIRST 0x8000 +/* Suite B 128 bit only mode: not normally used */ +# define X509_V_FLAG_SUITEB_128_LOS_ONLY 0x10000 +/* Suite B 192 bit only mode */ +# define X509_V_FLAG_SUITEB_192_LOS 0x20000 +/* Suite B 128 bit mode allowing 192 bit algorithms */ +# define X509_V_FLAG_SUITEB_128_LOS 0x30000 +/* Allow partial chains if at least one certificate is in trusted store */ +# define X509_V_FLAG_PARTIAL_CHAIN 0x80000 +/* + * If the initial chain is not trusted, do not attempt to build an alternative + * chain. Alternate chain checking was introduced in 1.1.0. Setting this flag + * will force the behaviour to match that of previous versions. + */ +# define X509_V_FLAG_NO_ALT_CHAINS 0x100000 +/* Do not check certificate/CRL validity against current time */ +# define X509_V_FLAG_NO_CHECK_TIME 0x200000 + +# define X509_VP_FLAG_DEFAULT 0x1 +# define X509_VP_FLAG_OVERWRITE 0x2 +# define X509_VP_FLAG_RESET_FLAGS 0x4 +# define X509_VP_FLAG_LOCKED 0x8 +# define X509_VP_FLAG_ONCE 0x10 + +/* Internal use: mask of policy related options */ +# define X509_V_FLAG_POLICY_MASK (X509_V_FLAG_POLICY_CHECK \ + | X509_V_FLAG_EXPLICIT_POLICY \ + | X509_V_FLAG_INHIBIT_ANY \ + | X509_V_FLAG_INHIBIT_MAP) + +int X509_OBJECT_idx_by_subject(STACK_OF(X509_OBJECT) *h, X509_LOOKUP_TYPE type, + const X509_NAME *name); +X509_OBJECT *X509_OBJECT_retrieve_by_subject(STACK_OF(X509_OBJECT) *h, + X509_LOOKUP_TYPE type, + const X509_NAME *name); +X509_OBJECT *X509_OBJECT_retrieve_match(STACK_OF(X509_OBJECT) *h, + X509_OBJECT *x); +int X509_OBJECT_up_ref_count(X509_OBJECT *a); +X509_OBJECT *X509_OBJECT_new(void); +void X509_OBJECT_free(X509_OBJECT *a); +X509_LOOKUP_TYPE X509_OBJECT_get_type(const X509_OBJECT *a); +X509 *X509_OBJECT_get0_X509(const X509_OBJECT *a); +int X509_OBJECT_set1_X509(X509_OBJECT *a, X509 *obj); +X509_CRL *X509_OBJECT_get0_X509_CRL(const X509_OBJECT *a); +int X509_OBJECT_set1_X509_CRL(X509_OBJECT *a, X509_CRL *obj); +X509_STORE *X509_STORE_new(void); +void X509_STORE_free(X509_STORE *xs); +int X509_STORE_lock(X509_STORE *xs); +int X509_STORE_unlock(X509_STORE *xs); +int X509_STORE_up_ref(X509_STORE *xs); +STACK_OF(X509_OBJECT) *X509_STORE_get0_objects(const X509_STORE *xs); +STACK_OF(X509) *X509_STORE_get1_all_certs(X509_STORE *xs); +STACK_OF(X509) *X509_STORE_CTX_get1_certs(X509_STORE_CTX *xs, + const X509_NAME *nm); +STACK_OF(X509_CRL) *X509_STORE_CTX_get1_crls(const X509_STORE_CTX *st, + const X509_NAME *nm); +int X509_STORE_set_flags(X509_STORE *xs, unsigned long flags); +int X509_STORE_set_purpose(X509_STORE *xs, int purpose); +int X509_STORE_set_trust(X509_STORE *xs, int trust); +int X509_STORE_set1_param(X509_STORE *xs, const X509_VERIFY_PARAM *pm); +X509_VERIFY_PARAM *X509_STORE_get0_param(const X509_STORE *xs); + +void X509_STORE_set_verify(X509_STORE *xs, X509_STORE_CTX_verify_fn verify); +#define X509_STORE_set_verify_func(ctx, func) \ + X509_STORE_set_verify((ctx),(func)) +void X509_STORE_CTX_set_verify(X509_STORE_CTX *ctx, + X509_STORE_CTX_verify_fn verify); +X509_STORE_CTX_verify_fn X509_STORE_get_verify(const X509_STORE *xs); +void X509_STORE_set_verify_cb(X509_STORE *xs, + X509_STORE_CTX_verify_cb verify_cb); +# define X509_STORE_set_verify_cb_func(ctx,func) \ + X509_STORE_set_verify_cb((ctx),(func)) +X509_STORE_CTX_verify_cb X509_STORE_get_verify_cb(const X509_STORE *xs); +void X509_STORE_set_get_issuer(X509_STORE *xs, + X509_STORE_CTX_get_issuer_fn get_issuer); +X509_STORE_CTX_get_issuer_fn X509_STORE_get_get_issuer(const X509_STORE *xs); +void X509_STORE_set_check_issued(X509_STORE *xs, + X509_STORE_CTX_check_issued_fn check_issued); +X509_STORE_CTX_check_issued_fn X509_STORE_get_check_issued(const X509_STORE *s); +void X509_STORE_set_check_revocation(X509_STORE *xs, + X509_STORE_CTX_check_revocation_fn check_revocation); +X509_STORE_CTX_check_revocation_fn + X509_STORE_get_check_revocation(const X509_STORE *xs); +void X509_STORE_set_get_crl(X509_STORE *xs, + X509_STORE_CTX_get_crl_fn get_crl); +X509_STORE_CTX_get_crl_fn X509_STORE_get_get_crl(const X509_STORE *xs); +void X509_STORE_set_check_crl(X509_STORE *xs, + X509_STORE_CTX_check_crl_fn check_crl); +X509_STORE_CTX_check_crl_fn X509_STORE_get_check_crl(const X509_STORE *xs); +void X509_STORE_set_cert_crl(X509_STORE *xs, + X509_STORE_CTX_cert_crl_fn cert_crl); +X509_STORE_CTX_cert_crl_fn X509_STORE_get_cert_crl(const X509_STORE *xs); +void X509_STORE_set_check_policy(X509_STORE *xs, + X509_STORE_CTX_check_policy_fn check_policy); +X509_STORE_CTX_check_policy_fn X509_STORE_get_check_policy(const X509_STORE *s); +void X509_STORE_set_lookup_certs(X509_STORE *xs, + X509_STORE_CTX_lookup_certs_fn lookup_certs); +X509_STORE_CTX_lookup_certs_fn X509_STORE_get_lookup_certs(const X509_STORE *s); +void X509_STORE_set_lookup_crls(X509_STORE *xs, + X509_STORE_CTX_lookup_crls_fn lookup_crls); +#define X509_STORE_set_lookup_crls_cb(ctx, func) \ + X509_STORE_set_lookup_crls((ctx), (func)) +X509_STORE_CTX_lookup_crls_fn X509_STORE_get_lookup_crls(const X509_STORE *xs); +void X509_STORE_set_cleanup(X509_STORE *xs, + X509_STORE_CTX_cleanup_fn cleanup); +X509_STORE_CTX_cleanup_fn X509_STORE_get_cleanup(const X509_STORE *xs); + +#define X509_STORE_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509_STORE, l, p, newf, dupf, freef) +int X509_STORE_set_ex_data(X509_STORE *xs, int idx, void *data); +void *X509_STORE_get_ex_data(const X509_STORE *xs, int idx); + +X509_STORE_CTX *X509_STORE_CTX_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +X509_STORE_CTX *X509_STORE_CTX_new(void); + +int X509_STORE_CTX_get1_issuer(X509 **issuer, X509_STORE_CTX *ctx, X509 *x); + +void X509_STORE_CTX_free(X509_STORE_CTX *ctx); +int X509_STORE_CTX_init(X509_STORE_CTX *ctx, X509_STORE *trust_store, + X509 *target, STACK_OF(X509) *untrusted); +int X509_STORE_CTX_init_rpk(X509_STORE_CTX *ctx, X509_STORE *trust_store, + EVP_PKEY* rpk); +void X509_STORE_CTX_set0_trusted_stack(X509_STORE_CTX *ctx, STACK_OF(X509) *sk); +void X509_STORE_CTX_cleanup(X509_STORE_CTX *ctx); + +X509_STORE *X509_STORE_CTX_get0_store(const X509_STORE_CTX *ctx); +X509 *X509_STORE_CTX_get0_cert(const X509_STORE_CTX *ctx); +EVP_PKEY *X509_STORE_CTX_get0_rpk(const X509_STORE_CTX *ctx); +STACK_OF(X509)* X509_STORE_CTX_get0_untrusted(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set0_untrusted(X509_STORE_CTX *ctx, STACK_OF(X509) *sk); +void X509_STORE_CTX_set_verify_cb(X509_STORE_CTX *ctx, + X509_STORE_CTX_verify_cb verify); +X509_STORE_CTX_verify_cb X509_STORE_CTX_get_verify_cb(const X509_STORE_CTX *ctx); +X509_STORE_CTX_verify_fn X509_STORE_CTX_get_verify(const X509_STORE_CTX *ctx); +X509_STORE_CTX_get_issuer_fn X509_STORE_CTX_get_get_issuer(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_issued_fn X509_STORE_CTX_get_check_issued(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_revocation_fn X509_STORE_CTX_get_check_revocation(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_get_crl(X509_STORE_CTX *ctx, + X509_STORE_CTX_get_crl_fn get_crl); +X509_STORE_CTX_get_crl_fn X509_STORE_CTX_get_get_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_crl_fn X509_STORE_CTX_get_check_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_cert_crl_fn X509_STORE_CTX_get_cert_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_policy_fn X509_STORE_CTX_get_check_policy(const X509_STORE_CTX *ctx); +X509_STORE_CTX_lookup_certs_fn X509_STORE_CTX_get_lookup_certs(const X509_STORE_CTX *ctx); +X509_STORE_CTX_lookup_crls_fn X509_STORE_CTX_get_lookup_crls(const X509_STORE_CTX *ctx); +X509_STORE_CTX_cleanup_fn X509_STORE_CTX_get_cleanup(const X509_STORE_CTX *ctx); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_STORE_CTX_get_chain X509_STORE_CTX_get0_chain +# define X509_STORE_CTX_set_chain X509_STORE_CTX_set0_untrusted +# define X509_STORE_CTX_trusted_stack X509_STORE_CTX_set0_trusted_stack +# define X509_STORE_get_by_subject X509_STORE_CTX_get_by_subject +# define X509_STORE_get1_certs X509_STORE_CTX_get1_certs +# define X509_STORE_get1_crls X509_STORE_CTX_get1_crls +/* the following macro is misspelled; use X509_STORE_get1_certs instead */ +# define X509_STORE_get1_cert X509_STORE_CTX_get1_certs +/* the following macro is misspelled; use X509_STORE_get1_crls instead */ +# define X509_STORE_get1_crl X509_STORE_CTX_get1_crls +#endif + +X509_LOOKUP *X509_STORE_add_lookup(X509_STORE *xs, X509_LOOKUP_METHOD *m); +X509_LOOKUP_METHOD *X509_LOOKUP_hash_dir(void); +X509_LOOKUP_METHOD *X509_LOOKUP_file(void); +X509_LOOKUP_METHOD *X509_LOOKUP_store(void); + +typedef int (*X509_LOOKUP_ctrl_fn)(X509_LOOKUP *ctx, int cmd, const char *argc, + long argl, char **ret); +typedef int (*X509_LOOKUP_ctrl_ex_fn)( + X509_LOOKUP *ctx, int cmd, const char *argc, long argl, char **ret, + OSSL_LIB_CTX *libctx, const char *propq); + +typedef int (*X509_LOOKUP_get_by_subject_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_subject_ex_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + X509_OBJECT *ret, + OSSL_LIB_CTX *libctx, + const char *propq); +typedef int (*X509_LOOKUP_get_by_issuer_serial_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + const ASN1_INTEGER *serial, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_fingerprint_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const unsigned char* bytes, + int len, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_alias_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const char *str, + int len, + X509_OBJECT *ret); + +X509_LOOKUP_METHOD *X509_LOOKUP_meth_new(const char *name); +void X509_LOOKUP_meth_free(X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_new_item(X509_LOOKUP_METHOD *method, + int (*new_item) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_new_item(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_free(X509_LOOKUP_METHOD *method, + void (*free_fn) (X509_LOOKUP *ctx)); +void (*X509_LOOKUP_meth_get_free(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_init(X509_LOOKUP_METHOD *method, + int (*init) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_init(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_shutdown(X509_LOOKUP_METHOD *method, + int (*shutdown) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_shutdown(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_ctrl(X509_LOOKUP_METHOD *method, + X509_LOOKUP_ctrl_fn ctrl_fn); +X509_LOOKUP_ctrl_fn X509_LOOKUP_meth_get_ctrl(const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_subject(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_subject_fn fn); +X509_LOOKUP_get_by_subject_fn X509_LOOKUP_meth_get_get_by_subject( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_issuer_serial(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_issuer_serial_fn fn); +X509_LOOKUP_get_by_issuer_serial_fn X509_LOOKUP_meth_get_get_by_issuer_serial( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_fingerprint(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_fingerprint_fn fn); +X509_LOOKUP_get_by_fingerprint_fn X509_LOOKUP_meth_get_get_by_fingerprint( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_alias(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_alias_fn fn); +X509_LOOKUP_get_by_alias_fn X509_LOOKUP_meth_get_get_by_alias( + const X509_LOOKUP_METHOD *method); + + +int X509_STORE_add_cert(X509_STORE *xs, X509 *x); +int X509_STORE_add_crl(X509_STORE *xs, X509_CRL *x); + +int X509_STORE_CTX_get_by_subject(const X509_STORE_CTX *vs, + X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret); +X509_OBJECT *X509_STORE_CTX_get_obj_by_subject(X509_STORE_CTX *vs, + X509_LOOKUP_TYPE type, + const X509_NAME *name); + +int X509_LOOKUP_ctrl(X509_LOOKUP *ctx, int cmd, const char *argc, + long argl, char **ret); +int X509_LOOKUP_ctrl_ex(X509_LOOKUP *ctx, int cmd, const char *argc, long argl, + char **ret, OSSL_LIB_CTX *libctx, const char *propq); + +int X509_load_cert_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_file_ex(X509_LOOKUP *ctx, const char *file, int type, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_load_crl_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_crl_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_crl_file_ex(X509_LOOKUP *ctx, const char *file, int type, + OSSL_LIB_CTX *libctx, const char *propq); + +X509_LOOKUP *X509_LOOKUP_new(X509_LOOKUP_METHOD *method); +void X509_LOOKUP_free(X509_LOOKUP *ctx); +int X509_LOOKUP_init(X509_LOOKUP *ctx); +int X509_LOOKUP_by_subject(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret); +int X509_LOOKUP_by_subject_ex(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_LOOKUP_by_issuer_serial(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, + const ASN1_INTEGER *serial, + X509_OBJECT *ret); +int X509_LOOKUP_by_fingerprint(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const unsigned char *bytes, int len, + X509_OBJECT *ret); +int X509_LOOKUP_by_alias(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const char *str, int len, X509_OBJECT *ret); +int X509_LOOKUP_set_method_data(X509_LOOKUP *ctx, void *data); +void *X509_LOOKUP_get_method_data(const X509_LOOKUP *ctx); +X509_STORE *X509_LOOKUP_get_store(const X509_LOOKUP *ctx); +int X509_LOOKUP_shutdown(X509_LOOKUP *ctx); + +int X509_STORE_load_file(X509_STORE *xs, const char *file); +int X509_STORE_load_path(X509_STORE *xs, const char *path); +int X509_STORE_load_store(X509_STORE *xs, const char *store); +int X509_STORE_load_locations(X509_STORE *s, const char *file, const char *dir); +int X509_STORE_set_default_paths(X509_STORE *xs); + +int X509_STORE_load_file_ex(X509_STORE *xs, const char *file, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_load_store_ex(X509_STORE *xs, const char *store, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_load_locations_ex(X509_STORE *xs, + const char *file, const char *dir, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_set_default_paths_ex(X509_STORE *xs, + OSSL_LIB_CTX *libctx, const char *propq); + +#define X509_STORE_CTX_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509_STORE_CTX, l, p, newf, dupf, freef) +int X509_STORE_CTX_set_ex_data(X509_STORE_CTX *ctx, int idx, void *data); +void *X509_STORE_CTX_get_ex_data(const X509_STORE_CTX *ctx, int idx); +int X509_STORE_CTX_get_error(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_error(X509_STORE_CTX *ctx, int s); +int X509_STORE_CTX_get_error_depth(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_error_depth(X509_STORE_CTX *ctx, int depth); +X509 *X509_STORE_CTX_get_current_cert(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_current_cert(X509_STORE_CTX *ctx, X509 *x); +X509 *X509_STORE_CTX_get0_current_issuer(const X509_STORE_CTX *ctx); +X509_CRL *X509_STORE_CTX_get0_current_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX *X509_STORE_CTX_get0_parent_ctx(const X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_STORE_CTX_get0_chain(const X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_STORE_CTX_get1_chain(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_cert(X509_STORE_CTX *ctx, X509 *target); +void X509_STORE_CTX_set0_rpk(X509_STORE_CTX *ctx, EVP_PKEY *target); +void X509_STORE_CTX_set0_verified_chain(X509_STORE_CTX *c, STACK_OF(X509) *sk); +void X509_STORE_CTX_set0_crls(X509_STORE_CTX *ctx, STACK_OF(X509_CRL) *sk); +int X509_STORE_CTX_set_purpose(X509_STORE_CTX *ctx, int purpose); +int X509_STORE_CTX_set_trust(X509_STORE_CTX *ctx, int trust); +int X509_STORE_CTX_purpose_inherit(X509_STORE_CTX *ctx, int def_purpose, + int purpose, int trust); +void X509_STORE_CTX_set_flags(X509_STORE_CTX *ctx, unsigned long flags); +void X509_STORE_CTX_set_time(X509_STORE_CTX *ctx, unsigned long flags, + time_t t); +void X509_STORE_CTX_set_current_reasons(X509_STORE_CTX *ctx, + unsigned int current_reasons); + +X509_POLICY_TREE *X509_STORE_CTX_get0_policy_tree(const X509_STORE_CTX *ctx); +int X509_STORE_CTX_get_explicit_policy(const X509_STORE_CTX *ctx); +int X509_STORE_CTX_get_num_untrusted(const X509_STORE_CTX *ctx); + +X509_VERIFY_PARAM *X509_STORE_CTX_get0_param(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set0_param(X509_STORE_CTX *ctx, X509_VERIFY_PARAM *param); +int X509_STORE_CTX_set_default(X509_STORE_CTX *ctx, const char *name); + +/* + * Bridge opacity barrier between libcrypt and libssl, also needed to support + * offline testing in test/danetest.c + */ +void X509_STORE_CTX_set0_dane(X509_STORE_CTX *ctx, SSL_DANE *dane); +#define DANE_FLAG_NO_DANE_EE_NAMECHECKS (1L << 0) + +/* X509_VERIFY_PARAM functions */ + +X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void); +void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *to, + const X509_VERIFY_PARAM *from); +int X509_VERIFY_PARAM_set1(X509_VERIFY_PARAM *to, + const X509_VERIFY_PARAM *from); +int X509_VERIFY_PARAM_set1_name(X509_VERIFY_PARAM *param, const char *name); +int X509_VERIFY_PARAM_set_flags(X509_VERIFY_PARAM *param, + unsigned long flags); +int X509_VERIFY_PARAM_clear_flags(X509_VERIFY_PARAM *param, + unsigned long flags); +unsigned long X509_VERIFY_PARAM_get_flags(const X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set_purpose(X509_VERIFY_PARAM *param, int purpose); +int X509_VERIFY_PARAM_set_trust(X509_VERIFY_PARAM *param, int trust); +void X509_VERIFY_PARAM_set_depth(X509_VERIFY_PARAM *param, int depth); +void X509_VERIFY_PARAM_set_auth_level(X509_VERIFY_PARAM *param, int auth_level); +time_t X509_VERIFY_PARAM_get_time(const X509_VERIFY_PARAM *param); +void X509_VERIFY_PARAM_set_time(X509_VERIFY_PARAM *param, time_t t); +int X509_VERIFY_PARAM_add0_policy(X509_VERIFY_PARAM *param, + ASN1_OBJECT *policy); +int X509_VERIFY_PARAM_set1_policies(X509_VERIFY_PARAM *param, + STACK_OF(ASN1_OBJECT) *policies); + +int X509_VERIFY_PARAM_set_inh_flags(X509_VERIFY_PARAM *param, + uint32_t flags); +uint32_t X509_VERIFY_PARAM_get_inh_flags(const X509_VERIFY_PARAM *param); + +char *X509_VERIFY_PARAM_get0_host(X509_VERIFY_PARAM *param, int idx); +int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); +int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); +void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param, + unsigned int flags); +unsigned int X509_VERIFY_PARAM_get_hostflags(const X509_VERIFY_PARAM *param); +char *X509_VERIFY_PARAM_get0_peername(const X509_VERIFY_PARAM *param); +void X509_VERIFY_PARAM_move_peername(X509_VERIFY_PARAM *, X509_VERIFY_PARAM *); +char *X509_VERIFY_PARAM_get0_email(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param, + const char *email, size_t emaillen); +char *X509_VERIFY_PARAM_get1_ip_asc(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param, + const unsigned char *ip, size_t iplen); +int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, + const char *ipasc); + +int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_get_auth_level(const X509_VERIFY_PARAM *param); +const char *X509_VERIFY_PARAM_get0_name(const X509_VERIFY_PARAM *param); + +int X509_VERIFY_PARAM_add0_table(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_get_count(void); +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_get0(int id); +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_lookup(const char *name); +void X509_VERIFY_PARAM_table_cleanup(void); + +/* Non positive return values are errors */ +#define X509_PCY_TREE_FAILURE -2 /* Failure to satisfy explicit policy */ +#define X509_PCY_TREE_INVALID -1 /* Inconsistent or invalid extensions */ +#define X509_PCY_TREE_INTERNAL 0 /* Internal error, most likely malloc */ + +/* + * Positive return values form a bit mask, all but the first are internal to + * the library and don't appear in results from X509_policy_check(). + */ +#define X509_PCY_TREE_VALID 1 /* The policy tree is valid */ +#define X509_PCY_TREE_EMPTY 2 /* The policy tree is empty */ +#define X509_PCY_TREE_EXPLICIT 4 /* Explicit policy required */ + +int X509_policy_check(X509_POLICY_TREE **ptree, int *pexplicit_policy, + STACK_OF(X509) *certs, + STACK_OF(ASN1_OBJECT) *policy_oids, unsigned int flags); + +void X509_policy_tree_free(X509_POLICY_TREE *tree); + +int X509_policy_tree_level_count(const X509_POLICY_TREE *tree); +X509_POLICY_LEVEL *X509_policy_tree_get0_level(const X509_POLICY_TREE *tree, + int i); + +STACK_OF(X509_POLICY_NODE) + *X509_policy_tree_get0_policies(const X509_POLICY_TREE *tree); + +STACK_OF(X509_POLICY_NODE) + *X509_policy_tree_get0_user_policies(const X509_POLICY_TREE *tree); + +int X509_policy_level_node_count(X509_POLICY_LEVEL *level); + +X509_POLICY_NODE *X509_policy_level_get0_node(const X509_POLICY_LEVEL *level, + int i); + +const ASN1_OBJECT *X509_policy_node_get0_policy(const X509_POLICY_NODE *node); + +STACK_OF(POLICYQUALINFO) + *X509_policy_node_get0_qualifiers(const X509_POLICY_NODE *node); +const X509_POLICY_NODE + *X509_policy_node_get0_parent(const X509_POLICY_NODE *node); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509v3.h new file mode 100644 index 00000000000..e64da7e0c5a --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509v3.h @@ -0,0 +1,1454 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509v3.h.in + * + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509V3_H +# define OPENSSL_X509V3_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509V3_H +# endif + +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward reference */ +struct v3_ext_method; +struct v3_ext_ctx; + +/* Useful typedefs */ + +typedef void *(*X509V3_EXT_NEW)(void); +typedef void (*X509V3_EXT_FREE) (void *); +typedef void *(*X509V3_EXT_D2I)(void *, const unsigned char **, long); +typedef int (*X509V3_EXT_I2D) (const void *, unsigned char **); +typedef STACK_OF(CONF_VALUE) * + (*X509V3_EXT_I2V) (const struct v3_ext_method *method, void *ext, + STACK_OF(CONF_VALUE) *extlist); +typedef void *(*X509V3_EXT_V2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, + STACK_OF(CONF_VALUE) *values); +typedef char *(*X509V3_EXT_I2S)(const struct v3_ext_method *method, + void *ext); +typedef void *(*X509V3_EXT_S2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, const char *str); +typedef int (*X509V3_EXT_I2R) (const struct v3_ext_method *method, void *ext, + BIO *out, int indent); +typedef void *(*X509V3_EXT_R2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, const char *str); + +/* V3 extension structure */ + +struct v3_ext_method { + int ext_nid; + int ext_flags; +/* If this is set the following four fields are ignored */ + ASN1_ITEM_EXP *it; +/* Old style ASN1 calls */ + X509V3_EXT_NEW ext_new; + X509V3_EXT_FREE ext_free; + X509V3_EXT_D2I d2i; + X509V3_EXT_I2D i2d; +/* The following pair is used for string extensions */ + X509V3_EXT_I2S i2s; + X509V3_EXT_S2I s2i; +/* The following pair is used for multi-valued extensions */ + X509V3_EXT_I2V i2v; + X509V3_EXT_V2I v2i; +/* The following are used for raw extensions */ + X509V3_EXT_I2R i2r; + X509V3_EXT_R2I r2i; + void *usr_data; /* Any extension specific data */ +}; + +typedef struct X509V3_CONF_METHOD_st { + char *(*get_string) (void *db, const char *section, const char *value); + STACK_OF(CONF_VALUE) *(*get_section) (void *db, const char *section); + void (*free_string) (void *db, char *string); + void (*free_section) (void *db, STACK_OF(CONF_VALUE) *section); +} X509V3_CONF_METHOD; + +/* Context specific info for producing X509 v3 extensions*/ +struct v3_ext_ctx { +# define X509V3_CTX_TEST 0x1 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define CTX_TEST X509V3_CTX_TEST +# endif +# define X509V3_CTX_REPLACE 0x2 + int flags; + X509 *issuer_cert; + X509 *subject_cert; + X509_REQ *subject_req; + X509_CRL *crl; + X509V3_CONF_METHOD *db_meth; + void *db; + EVP_PKEY *issuer_pkey; +/* Maybe more here */ +}; + +typedef struct v3_ext_method X509V3_EXT_METHOD; + +SKM_DEFINE_STACK_OF_INTERNAL(X509V3_EXT_METHOD, X509V3_EXT_METHOD, X509V3_EXT_METHOD) +#define sk_X509V3_EXT_METHOD_num(sk) OPENSSL_sk_num(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_value(sk, idx) ((X509V3_EXT_METHOD *)OPENSSL_sk_value(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk), (idx))) +#define sk_X509V3_EXT_METHOD_new(cmp) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new(ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp))) +#define sk_X509V3_EXT_METHOD_new_null() ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new_null()) +#define sk_X509V3_EXT_METHOD_new_reserve(cmp, n) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new_reserve(ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp), (n))) +#define sk_X509V3_EXT_METHOD_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (n)) +#define sk_X509V3_EXT_METHOD_free(sk) OPENSSL_sk_free(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_zero(sk) OPENSSL_sk_zero(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_delete(sk, i) ((X509V3_EXT_METHOD *)OPENSSL_sk_delete(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (i))) +#define sk_X509V3_EXT_METHOD_delete_ptr(sk, ptr) ((X509V3_EXT_METHOD *)OPENSSL_sk_delete_ptr(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr))) +#define sk_X509V3_EXT_METHOD_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_pop(sk) ((X509V3_EXT_METHOD *)OPENSSL_sk_pop(ossl_check_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_shift(sk) ((X509V3_EXT_METHOD *)OPENSSL_sk_shift(ossl_check_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509V3_EXT_METHOD_sk_type(sk),ossl_check_X509V3_EXT_METHOD_freefunc_type(freefunc)) +#define sk_X509V3_EXT_METHOD_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr), (idx)) +#define sk_X509V3_EXT_METHOD_set(sk, idx, ptr) ((X509V3_EXT_METHOD *)OPENSSL_sk_set(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (idx), ossl_check_X509V3_EXT_METHOD_type(ptr))) +#define sk_X509V3_EXT_METHOD_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr), pnum) +#define sk_X509V3_EXT_METHOD_sort(sk) OPENSSL_sk_sort(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_dup(sk) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_dup(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_deep_copy(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_copyfunc_type(copyfunc), ossl_check_X509V3_EXT_METHOD_freefunc_type(freefunc))) +#define sk_X509V3_EXT_METHOD_set_cmp_func(sk, cmp) ((sk_X509V3_EXT_METHOD_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp))) + + +/* ext_flags values */ +# define X509V3_EXT_DYNAMIC 0x1 +# define X509V3_EXT_CTX_DEP 0x2 +# define X509V3_EXT_MULTILINE 0x4 + +typedef BIT_STRING_BITNAME ENUMERATED_NAMES; + +typedef struct BASIC_CONSTRAINTS_st { + int ca; + ASN1_INTEGER *pathlen; +} BASIC_CONSTRAINTS; + +typedef struct PKEY_USAGE_PERIOD_st { + ASN1_GENERALIZEDTIME *notBefore; + ASN1_GENERALIZEDTIME *notAfter; +} PKEY_USAGE_PERIOD; + +typedef struct otherName_st { + ASN1_OBJECT *type_id; + ASN1_TYPE *value; +} OTHERNAME; + +typedef struct EDIPartyName_st { + ASN1_STRING *nameAssigner; + ASN1_STRING *partyName; +} EDIPARTYNAME; + +typedef struct GENERAL_NAME_st { +# define GEN_OTHERNAME 0 +# define GEN_EMAIL 1 +# define GEN_DNS 2 +# define GEN_X400 3 +# define GEN_DIRNAME 4 +# define GEN_EDIPARTY 5 +# define GEN_URI 6 +# define GEN_IPADD 7 +# define GEN_RID 8 + int type; + union { + char *ptr; + OTHERNAME *otherName; /* otherName */ + ASN1_IA5STRING *rfc822Name; + ASN1_IA5STRING *dNSName; + ASN1_STRING *x400Address; + X509_NAME *directoryName; + EDIPARTYNAME *ediPartyName; + ASN1_IA5STRING *uniformResourceIdentifier; + ASN1_OCTET_STRING *iPAddress; + ASN1_OBJECT *registeredID; + /* Old names */ + ASN1_OCTET_STRING *ip; /* iPAddress */ + X509_NAME *dirn; /* dirn */ + ASN1_IA5STRING *ia5; /* rfc822Name, dNSName, + * uniformResourceIdentifier */ + ASN1_OBJECT *rid; /* registeredID */ + ASN1_TYPE *other; /* x400Address */ + } d; +} GENERAL_NAME; + +typedef struct ACCESS_DESCRIPTION_st { + ASN1_OBJECT *method; + GENERAL_NAME *location; +} ACCESS_DESCRIPTION; + +SKM_DEFINE_STACK_OF_INTERNAL(ACCESS_DESCRIPTION, ACCESS_DESCRIPTION, ACCESS_DESCRIPTION) +#define sk_ACCESS_DESCRIPTION_num(sk) OPENSSL_sk_num(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_value(sk, idx) ((ACCESS_DESCRIPTION *)OPENSSL_sk_value(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk), (idx))) +#define sk_ACCESS_DESCRIPTION_new(cmp) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new(ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp))) +#define sk_ACCESS_DESCRIPTION_new_null() ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new_null()) +#define sk_ACCESS_DESCRIPTION_new_reserve(cmp, n) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new_reserve(ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp), (n))) +#define sk_ACCESS_DESCRIPTION_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (n)) +#define sk_ACCESS_DESCRIPTION_free(sk) OPENSSL_sk_free(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_zero(sk) OPENSSL_sk_zero(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_delete(sk, i) ((ACCESS_DESCRIPTION *)OPENSSL_sk_delete(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (i))) +#define sk_ACCESS_DESCRIPTION_delete_ptr(sk, ptr) ((ACCESS_DESCRIPTION *)OPENSSL_sk_delete_ptr(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr))) +#define sk_ACCESS_DESCRIPTION_push(sk, ptr) OPENSSL_sk_push(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_pop(sk) ((ACCESS_DESCRIPTION *)OPENSSL_sk_pop(ossl_check_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_shift(sk) ((ACCESS_DESCRIPTION *)OPENSSL_sk_shift(ossl_check_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ACCESS_DESCRIPTION_sk_type(sk),ossl_check_ACCESS_DESCRIPTION_freefunc_type(freefunc)) +#define sk_ACCESS_DESCRIPTION_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr), (idx)) +#define sk_ACCESS_DESCRIPTION_set(sk, idx, ptr) ((ACCESS_DESCRIPTION *)OPENSSL_sk_set(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (idx), ossl_check_ACCESS_DESCRIPTION_type(ptr))) +#define sk_ACCESS_DESCRIPTION_find(sk, ptr) OPENSSL_sk_find(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr), pnum) +#define sk_ACCESS_DESCRIPTION_sort(sk) OPENSSL_sk_sort(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_dup(sk) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_dup(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_deep_copy(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_copyfunc_type(copyfunc), ossl_check_ACCESS_DESCRIPTION_freefunc_type(freefunc))) +#define sk_ACCESS_DESCRIPTION_set_cmp_func(sk, cmp) ((sk_ACCESS_DESCRIPTION_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_NAME, GENERAL_NAME, GENERAL_NAME) +#define sk_GENERAL_NAME_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_value(sk, idx) ((GENERAL_NAME *)OPENSSL_sk_value(ossl_check_const_GENERAL_NAME_sk_type(sk), (idx))) +#define sk_GENERAL_NAME_new(cmp) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new(ossl_check_GENERAL_NAME_compfunc_type(cmp))) +#define sk_GENERAL_NAME_new_null() ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_NAME_new_reserve(cmp, n) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_NAME_compfunc_type(cmp), (n))) +#define sk_GENERAL_NAME_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_NAME_sk_type(sk), (n)) +#define sk_GENERAL_NAME_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_delete(sk, i) ((GENERAL_NAME *)OPENSSL_sk_delete(ossl_check_GENERAL_NAME_sk_type(sk), (i))) +#define sk_GENERAL_NAME_delete_ptr(sk, ptr) ((GENERAL_NAME *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr))) +#define sk_GENERAL_NAME_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_pop(sk) ((GENERAL_NAME *)OPENSSL_sk_pop(ossl_check_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_shift(sk) ((GENERAL_NAME *)OPENSSL_sk_shift(ossl_check_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_NAME_sk_type(sk),ossl_check_GENERAL_NAME_freefunc_type(freefunc)) +#define sk_GENERAL_NAME_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr), (idx)) +#define sk_GENERAL_NAME_set(sk, idx, ptr) ((GENERAL_NAME *)OPENSSL_sk_set(ossl_check_GENERAL_NAME_sk_type(sk), (idx), ossl_check_GENERAL_NAME_type(ptr))) +#define sk_GENERAL_NAME_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr), pnum) +#define sk_GENERAL_NAME_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_dup(sk) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_copyfunc_type(copyfunc), ossl_check_GENERAL_NAME_freefunc_type(freefunc))) +#define sk_GENERAL_NAME_set_cmp_func(sk, cmp) ((sk_GENERAL_NAME_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_compfunc_type(cmp))) + + +typedef STACK_OF(ACCESS_DESCRIPTION) AUTHORITY_INFO_ACCESS; +typedef STACK_OF(ASN1_OBJECT) EXTENDED_KEY_USAGE; +typedef STACK_OF(ASN1_INTEGER) TLS_FEATURE; +typedef STACK_OF(GENERAL_NAME) GENERAL_NAMES; + +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_NAMES, GENERAL_NAMES, GENERAL_NAMES) +#define sk_GENERAL_NAMES_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_value(sk, idx) ((GENERAL_NAMES *)OPENSSL_sk_value(ossl_check_const_GENERAL_NAMES_sk_type(sk), (idx))) +#define sk_GENERAL_NAMES_new(cmp) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new(ossl_check_GENERAL_NAMES_compfunc_type(cmp))) +#define sk_GENERAL_NAMES_new_null() ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_NAMES_new_reserve(cmp, n) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_NAMES_compfunc_type(cmp), (n))) +#define sk_GENERAL_NAMES_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_NAMES_sk_type(sk), (n)) +#define sk_GENERAL_NAMES_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_delete(sk, i) ((GENERAL_NAMES *)OPENSSL_sk_delete(ossl_check_GENERAL_NAMES_sk_type(sk), (i))) +#define sk_GENERAL_NAMES_delete_ptr(sk, ptr) ((GENERAL_NAMES *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr))) +#define sk_GENERAL_NAMES_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_pop(sk) ((GENERAL_NAMES *)OPENSSL_sk_pop(ossl_check_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_shift(sk) ((GENERAL_NAMES *)OPENSSL_sk_shift(ossl_check_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_NAMES_sk_type(sk),ossl_check_GENERAL_NAMES_freefunc_type(freefunc)) +#define sk_GENERAL_NAMES_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr), (idx)) +#define sk_GENERAL_NAMES_set(sk, idx, ptr) ((GENERAL_NAMES *)OPENSSL_sk_set(ossl_check_GENERAL_NAMES_sk_type(sk), (idx), ossl_check_GENERAL_NAMES_type(ptr))) +#define sk_GENERAL_NAMES_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr), pnum) +#define sk_GENERAL_NAMES_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_dup(sk) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_copyfunc_type(copyfunc), ossl_check_GENERAL_NAMES_freefunc_type(freefunc))) +#define sk_GENERAL_NAMES_set_cmp_func(sk, cmp) ((sk_GENERAL_NAMES_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_compfunc_type(cmp))) + + +typedef struct DIST_POINT_NAME_st { + int type; + union { + GENERAL_NAMES *fullname; + STACK_OF(X509_NAME_ENTRY) *relativename; + } name; +/* If relativename then this contains the full distribution point name */ + X509_NAME *dpname; +} DIST_POINT_NAME; +/* All existing reasons */ +# define CRLDP_ALL_REASONS 0x807f + +# define CRL_REASON_NONE -1 +# define CRL_REASON_UNSPECIFIED 0 +# define CRL_REASON_KEY_COMPROMISE 1 +# define CRL_REASON_CA_COMPROMISE 2 +# define CRL_REASON_AFFILIATION_CHANGED 3 +# define CRL_REASON_SUPERSEDED 4 +# define CRL_REASON_CESSATION_OF_OPERATION 5 +# define CRL_REASON_CERTIFICATE_HOLD 6 +# define CRL_REASON_REMOVE_FROM_CRL 8 +# define CRL_REASON_PRIVILEGE_WITHDRAWN 9 +# define CRL_REASON_AA_COMPROMISE 10 + +struct DIST_POINT_st { + DIST_POINT_NAME *distpoint; + ASN1_BIT_STRING *reasons; + GENERAL_NAMES *CRLissuer; + int dp_reasons; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(DIST_POINT, DIST_POINT, DIST_POINT) +#define sk_DIST_POINT_num(sk) OPENSSL_sk_num(ossl_check_const_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_value(sk, idx) ((DIST_POINT *)OPENSSL_sk_value(ossl_check_const_DIST_POINT_sk_type(sk), (idx))) +#define sk_DIST_POINT_new(cmp) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new(ossl_check_DIST_POINT_compfunc_type(cmp))) +#define sk_DIST_POINT_new_null() ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new_null()) +#define sk_DIST_POINT_new_reserve(cmp, n) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new_reserve(ossl_check_DIST_POINT_compfunc_type(cmp), (n))) +#define sk_DIST_POINT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_DIST_POINT_sk_type(sk), (n)) +#define sk_DIST_POINT_free(sk) OPENSSL_sk_free(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_zero(sk) OPENSSL_sk_zero(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_delete(sk, i) ((DIST_POINT *)OPENSSL_sk_delete(ossl_check_DIST_POINT_sk_type(sk), (i))) +#define sk_DIST_POINT_delete_ptr(sk, ptr) ((DIST_POINT *)OPENSSL_sk_delete_ptr(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr))) +#define sk_DIST_POINT_push(sk, ptr) OPENSSL_sk_push(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_pop(sk) ((DIST_POINT *)OPENSSL_sk_pop(ossl_check_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_shift(sk) ((DIST_POINT *)OPENSSL_sk_shift(ossl_check_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_DIST_POINT_sk_type(sk),ossl_check_DIST_POINT_freefunc_type(freefunc)) +#define sk_DIST_POINT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr), (idx)) +#define sk_DIST_POINT_set(sk, idx, ptr) ((DIST_POINT *)OPENSSL_sk_set(ossl_check_DIST_POINT_sk_type(sk), (idx), ossl_check_DIST_POINT_type(ptr))) +#define sk_DIST_POINT_find(sk, ptr) OPENSSL_sk_find(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr), pnum) +#define sk_DIST_POINT_sort(sk) OPENSSL_sk_sort(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_dup(sk) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_dup(ossl_check_const_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_deep_copy(ossl_check_const_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_copyfunc_type(copyfunc), ossl_check_DIST_POINT_freefunc_type(freefunc))) +#define sk_DIST_POINT_set_cmp_func(sk, cmp) ((sk_DIST_POINT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_compfunc_type(cmp))) + + +typedef STACK_OF(DIST_POINT) CRL_DIST_POINTS; + +struct AUTHORITY_KEYID_st { + ASN1_OCTET_STRING *keyid; + GENERAL_NAMES *issuer; + ASN1_INTEGER *serial; +}; + +/* Strong extranet structures */ + +typedef struct SXNET_ID_st { + ASN1_INTEGER *zone; + ASN1_OCTET_STRING *user; +} SXNETID; + +SKM_DEFINE_STACK_OF_INTERNAL(SXNETID, SXNETID, SXNETID) +#define sk_SXNETID_num(sk) OPENSSL_sk_num(ossl_check_const_SXNETID_sk_type(sk)) +#define sk_SXNETID_value(sk, idx) ((SXNETID *)OPENSSL_sk_value(ossl_check_const_SXNETID_sk_type(sk), (idx))) +#define sk_SXNETID_new(cmp) ((STACK_OF(SXNETID) *)OPENSSL_sk_new(ossl_check_SXNETID_compfunc_type(cmp))) +#define sk_SXNETID_new_null() ((STACK_OF(SXNETID) *)OPENSSL_sk_new_null()) +#define sk_SXNETID_new_reserve(cmp, n) ((STACK_OF(SXNETID) *)OPENSSL_sk_new_reserve(ossl_check_SXNETID_compfunc_type(cmp), (n))) +#define sk_SXNETID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SXNETID_sk_type(sk), (n)) +#define sk_SXNETID_free(sk) OPENSSL_sk_free(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_zero(sk) OPENSSL_sk_zero(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_delete(sk, i) ((SXNETID *)OPENSSL_sk_delete(ossl_check_SXNETID_sk_type(sk), (i))) +#define sk_SXNETID_delete_ptr(sk, ptr) ((SXNETID *)OPENSSL_sk_delete_ptr(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr))) +#define sk_SXNETID_push(sk, ptr) OPENSSL_sk_push(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_pop(sk) ((SXNETID *)OPENSSL_sk_pop(ossl_check_SXNETID_sk_type(sk))) +#define sk_SXNETID_shift(sk) ((SXNETID *)OPENSSL_sk_shift(ossl_check_SXNETID_sk_type(sk))) +#define sk_SXNETID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SXNETID_sk_type(sk),ossl_check_SXNETID_freefunc_type(freefunc)) +#define sk_SXNETID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr), (idx)) +#define sk_SXNETID_set(sk, idx, ptr) ((SXNETID *)OPENSSL_sk_set(ossl_check_SXNETID_sk_type(sk), (idx), ossl_check_SXNETID_type(ptr))) +#define sk_SXNETID_find(sk, ptr) OPENSSL_sk_find(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr), pnum) +#define sk_SXNETID_sort(sk) OPENSSL_sk_sort(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SXNETID_sk_type(sk)) +#define sk_SXNETID_dup(sk) ((STACK_OF(SXNETID) *)OPENSSL_sk_dup(ossl_check_const_SXNETID_sk_type(sk))) +#define sk_SXNETID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SXNETID) *)OPENSSL_sk_deep_copy(ossl_check_const_SXNETID_sk_type(sk), ossl_check_SXNETID_copyfunc_type(copyfunc), ossl_check_SXNETID_freefunc_type(freefunc))) +#define sk_SXNETID_set_cmp_func(sk, cmp) ((sk_SXNETID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_compfunc_type(cmp))) + + + +typedef struct SXNET_st { + ASN1_INTEGER *version; + STACK_OF(SXNETID) *ids; +} SXNET; + +typedef struct ISSUER_SIGN_TOOL_st { + ASN1_UTF8STRING *signTool; + ASN1_UTF8STRING *cATool; + ASN1_UTF8STRING *signToolCert; + ASN1_UTF8STRING *cAToolCert; +} ISSUER_SIGN_TOOL; + +typedef struct NOTICEREF_st { + ASN1_STRING *organization; + STACK_OF(ASN1_INTEGER) *noticenos; +} NOTICEREF; + +typedef struct USERNOTICE_st { + NOTICEREF *noticeref; + ASN1_STRING *exptext; +} USERNOTICE; + +typedef struct POLICYQUALINFO_st { + ASN1_OBJECT *pqualid; + union { + ASN1_IA5STRING *cpsuri; + USERNOTICE *usernotice; + ASN1_TYPE *other; + } d; +} POLICYQUALINFO; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICYQUALINFO, POLICYQUALINFO, POLICYQUALINFO) +#define sk_POLICYQUALINFO_num(sk) OPENSSL_sk_num(ossl_check_const_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_value(sk, idx) ((POLICYQUALINFO *)OPENSSL_sk_value(ossl_check_const_POLICYQUALINFO_sk_type(sk), (idx))) +#define sk_POLICYQUALINFO_new(cmp) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new(ossl_check_POLICYQUALINFO_compfunc_type(cmp))) +#define sk_POLICYQUALINFO_new_null() ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new_null()) +#define sk_POLICYQUALINFO_new_reserve(cmp, n) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new_reserve(ossl_check_POLICYQUALINFO_compfunc_type(cmp), (n))) +#define sk_POLICYQUALINFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICYQUALINFO_sk_type(sk), (n)) +#define sk_POLICYQUALINFO_free(sk) OPENSSL_sk_free(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_zero(sk) OPENSSL_sk_zero(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_delete(sk, i) ((POLICYQUALINFO *)OPENSSL_sk_delete(ossl_check_POLICYQUALINFO_sk_type(sk), (i))) +#define sk_POLICYQUALINFO_delete_ptr(sk, ptr) ((POLICYQUALINFO *)OPENSSL_sk_delete_ptr(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr))) +#define sk_POLICYQUALINFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_pop(sk) ((POLICYQUALINFO *)OPENSSL_sk_pop(ossl_check_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_shift(sk) ((POLICYQUALINFO *)OPENSSL_sk_shift(ossl_check_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICYQUALINFO_sk_type(sk),ossl_check_POLICYQUALINFO_freefunc_type(freefunc)) +#define sk_POLICYQUALINFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr), (idx)) +#define sk_POLICYQUALINFO_set(sk, idx, ptr) ((POLICYQUALINFO *)OPENSSL_sk_set(ossl_check_POLICYQUALINFO_sk_type(sk), (idx), ossl_check_POLICYQUALINFO_type(ptr))) +#define sk_POLICYQUALINFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr), pnum) +#define sk_POLICYQUALINFO_sort(sk) OPENSSL_sk_sort(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_dup(sk) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_dup(ossl_check_const_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_copyfunc_type(copyfunc), ossl_check_POLICYQUALINFO_freefunc_type(freefunc))) +#define sk_POLICYQUALINFO_set_cmp_func(sk, cmp) ((sk_POLICYQUALINFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_compfunc_type(cmp))) + + + +typedef struct POLICYINFO_st { + ASN1_OBJECT *policyid; + STACK_OF(POLICYQUALINFO) *qualifiers; +} POLICYINFO; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICYINFO, POLICYINFO, POLICYINFO) +#define sk_POLICYINFO_num(sk) OPENSSL_sk_num(ossl_check_const_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_value(sk, idx) ((POLICYINFO *)OPENSSL_sk_value(ossl_check_const_POLICYINFO_sk_type(sk), (idx))) +#define sk_POLICYINFO_new(cmp) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new(ossl_check_POLICYINFO_compfunc_type(cmp))) +#define sk_POLICYINFO_new_null() ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new_null()) +#define sk_POLICYINFO_new_reserve(cmp, n) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new_reserve(ossl_check_POLICYINFO_compfunc_type(cmp), (n))) +#define sk_POLICYINFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICYINFO_sk_type(sk), (n)) +#define sk_POLICYINFO_free(sk) OPENSSL_sk_free(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_zero(sk) OPENSSL_sk_zero(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_delete(sk, i) ((POLICYINFO *)OPENSSL_sk_delete(ossl_check_POLICYINFO_sk_type(sk), (i))) +#define sk_POLICYINFO_delete_ptr(sk, ptr) ((POLICYINFO *)OPENSSL_sk_delete_ptr(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr))) +#define sk_POLICYINFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_pop(sk) ((POLICYINFO *)OPENSSL_sk_pop(ossl_check_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_shift(sk) ((POLICYINFO *)OPENSSL_sk_shift(ossl_check_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICYINFO_sk_type(sk),ossl_check_POLICYINFO_freefunc_type(freefunc)) +#define sk_POLICYINFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr), (idx)) +#define sk_POLICYINFO_set(sk, idx, ptr) ((POLICYINFO *)OPENSSL_sk_set(ossl_check_POLICYINFO_sk_type(sk), (idx), ossl_check_POLICYINFO_type(ptr))) +#define sk_POLICYINFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr), pnum) +#define sk_POLICYINFO_sort(sk) OPENSSL_sk_sort(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_dup(sk) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_dup(ossl_check_const_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_copyfunc_type(copyfunc), ossl_check_POLICYINFO_freefunc_type(freefunc))) +#define sk_POLICYINFO_set_cmp_func(sk, cmp) ((sk_POLICYINFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_compfunc_type(cmp))) + + +typedef STACK_OF(POLICYINFO) CERTIFICATEPOLICIES; + +typedef struct POLICY_MAPPING_st { + ASN1_OBJECT *issuerDomainPolicy; + ASN1_OBJECT *subjectDomainPolicy; +} POLICY_MAPPING; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICY_MAPPING, POLICY_MAPPING, POLICY_MAPPING) +#define sk_POLICY_MAPPING_num(sk) OPENSSL_sk_num(ossl_check_const_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_value(sk, idx) ((POLICY_MAPPING *)OPENSSL_sk_value(ossl_check_const_POLICY_MAPPING_sk_type(sk), (idx))) +#define sk_POLICY_MAPPING_new(cmp) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new(ossl_check_POLICY_MAPPING_compfunc_type(cmp))) +#define sk_POLICY_MAPPING_new_null() ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new_null()) +#define sk_POLICY_MAPPING_new_reserve(cmp, n) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new_reserve(ossl_check_POLICY_MAPPING_compfunc_type(cmp), (n))) +#define sk_POLICY_MAPPING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICY_MAPPING_sk_type(sk), (n)) +#define sk_POLICY_MAPPING_free(sk) OPENSSL_sk_free(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_zero(sk) OPENSSL_sk_zero(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_delete(sk, i) ((POLICY_MAPPING *)OPENSSL_sk_delete(ossl_check_POLICY_MAPPING_sk_type(sk), (i))) +#define sk_POLICY_MAPPING_delete_ptr(sk, ptr) ((POLICY_MAPPING *)OPENSSL_sk_delete_ptr(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr))) +#define sk_POLICY_MAPPING_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_pop(sk) ((POLICY_MAPPING *)OPENSSL_sk_pop(ossl_check_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_shift(sk) ((POLICY_MAPPING *)OPENSSL_sk_shift(ossl_check_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICY_MAPPING_sk_type(sk),ossl_check_POLICY_MAPPING_freefunc_type(freefunc)) +#define sk_POLICY_MAPPING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr), (idx)) +#define sk_POLICY_MAPPING_set(sk, idx, ptr) ((POLICY_MAPPING *)OPENSSL_sk_set(ossl_check_POLICY_MAPPING_sk_type(sk), (idx), ossl_check_POLICY_MAPPING_type(ptr))) +#define sk_POLICY_MAPPING_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr), pnum) +#define sk_POLICY_MAPPING_sort(sk) OPENSSL_sk_sort(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_dup(sk) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_dup(ossl_check_const_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_copyfunc_type(copyfunc), ossl_check_POLICY_MAPPING_freefunc_type(freefunc))) +#define sk_POLICY_MAPPING_set_cmp_func(sk, cmp) ((sk_POLICY_MAPPING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_compfunc_type(cmp))) + + +typedef STACK_OF(POLICY_MAPPING) POLICY_MAPPINGS; + +typedef struct GENERAL_SUBTREE_st { + GENERAL_NAME *base; + ASN1_INTEGER *minimum; + ASN1_INTEGER *maximum; +} GENERAL_SUBTREE; + +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_SUBTREE, GENERAL_SUBTREE, GENERAL_SUBTREE) +#define sk_GENERAL_SUBTREE_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_value(sk, idx) ((GENERAL_SUBTREE *)OPENSSL_sk_value(ossl_check_const_GENERAL_SUBTREE_sk_type(sk), (idx))) +#define sk_GENERAL_SUBTREE_new(cmp) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new(ossl_check_GENERAL_SUBTREE_compfunc_type(cmp))) +#define sk_GENERAL_SUBTREE_new_null() ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_SUBTREE_new_reserve(cmp, n) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_SUBTREE_compfunc_type(cmp), (n))) +#define sk_GENERAL_SUBTREE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_SUBTREE_sk_type(sk), (n)) +#define sk_GENERAL_SUBTREE_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_delete(sk, i) ((GENERAL_SUBTREE *)OPENSSL_sk_delete(ossl_check_GENERAL_SUBTREE_sk_type(sk), (i))) +#define sk_GENERAL_SUBTREE_delete_ptr(sk, ptr) ((GENERAL_SUBTREE *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr))) +#define sk_GENERAL_SUBTREE_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_pop(sk) ((GENERAL_SUBTREE *)OPENSSL_sk_pop(ossl_check_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_shift(sk) ((GENERAL_SUBTREE *)OPENSSL_sk_shift(ossl_check_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_SUBTREE_sk_type(sk),ossl_check_GENERAL_SUBTREE_freefunc_type(freefunc)) +#define sk_GENERAL_SUBTREE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr), (idx)) +#define sk_GENERAL_SUBTREE_set(sk, idx, ptr) ((GENERAL_SUBTREE *)OPENSSL_sk_set(ossl_check_GENERAL_SUBTREE_sk_type(sk), (idx), ossl_check_GENERAL_SUBTREE_type(ptr))) +#define sk_GENERAL_SUBTREE_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr), pnum) +#define sk_GENERAL_SUBTREE_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_dup(sk) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_copyfunc_type(copyfunc), ossl_check_GENERAL_SUBTREE_freefunc_type(freefunc))) +#define sk_GENERAL_SUBTREE_set_cmp_func(sk, cmp) ((sk_GENERAL_SUBTREE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_compfunc_type(cmp))) + + +struct NAME_CONSTRAINTS_st { + STACK_OF(GENERAL_SUBTREE) *permittedSubtrees; + STACK_OF(GENERAL_SUBTREE) *excludedSubtrees; +}; + +typedef struct POLICY_CONSTRAINTS_st { + ASN1_INTEGER *requireExplicitPolicy; + ASN1_INTEGER *inhibitPolicyMapping; +} POLICY_CONSTRAINTS; + +/* Proxy certificate structures, see RFC 3820 */ +typedef struct PROXY_POLICY_st { + ASN1_OBJECT *policyLanguage; + ASN1_OCTET_STRING *policy; +} PROXY_POLICY; + +typedef struct PROXY_CERT_INFO_EXTENSION_st { + ASN1_INTEGER *pcPathLengthConstraint; + PROXY_POLICY *proxyPolicy; +} PROXY_CERT_INFO_EXTENSION; + +DECLARE_ASN1_FUNCTIONS(PROXY_POLICY) +DECLARE_ASN1_FUNCTIONS(PROXY_CERT_INFO_EXTENSION) + +struct ISSUING_DIST_POINT_st { + DIST_POINT_NAME *distpoint; + int onlyuser; + int onlyCA; + ASN1_BIT_STRING *onlysomereasons; + int indirectCRL; + int onlyattr; +}; + +/* Values in idp_flags field */ +/* IDP present */ +# define IDP_PRESENT 0x1 +/* IDP values inconsistent */ +# define IDP_INVALID 0x2 +/* onlyuser true */ +# define IDP_ONLYUSER 0x4 +/* onlyCA true */ +# define IDP_ONLYCA 0x8 +/* onlyattr true */ +# define IDP_ONLYATTR 0x10 +/* indirectCRL true */ +# define IDP_INDIRECT 0x20 +/* onlysomereasons present */ +# define IDP_REASONS 0x40 + +# define X509V3_conf_err(val) ERR_add_error_data(6, \ + "section:", (val)->section, \ + ",name:", (val)->name, ",value:", (val)->value) + +# define X509V3_set_ctx_test(ctx) \ + X509V3_set_ctx(ctx, NULL, NULL, NULL, NULL, X509V3_CTX_TEST) +# define X509V3_set_ctx_nodb(ctx) (ctx)->db = NULL; + +# define EXT_BITSTRING(nid, table) { nid, 0, ASN1_ITEM_ref(ASN1_BIT_STRING), \ + 0,0,0,0, \ + 0,0, \ + (X509V3_EXT_I2V)i2v_ASN1_BIT_STRING, \ + (X509V3_EXT_V2I)v2i_ASN1_BIT_STRING, \ + NULL, NULL, \ + table} + +# define EXT_IA5STRING(nid) { nid, 0, ASN1_ITEM_ref(ASN1_IA5STRING), \ + 0,0,0,0, \ + (X509V3_EXT_I2S)i2s_ASN1_IA5STRING, \ + (X509V3_EXT_S2I)s2i_ASN1_IA5STRING, \ + 0,0,0,0, \ + NULL} + +#define EXT_UTF8STRING(nid) { nid, 0, ASN1_ITEM_ref(ASN1_UTF8STRING), \ + 0,0,0,0, \ + (X509V3_EXT_I2S)i2s_ASN1_UTF8STRING, \ + (X509V3_EXT_S2I)s2i_ASN1_UTF8STRING, \ + 0,0,0,0, \ + NULL} + +# define EXT_END { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +/* X509_PURPOSE stuff */ + +# define EXFLAG_BCONS 0x1 +# define EXFLAG_KUSAGE 0x2 +# define EXFLAG_XKUSAGE 0x4 +# define EXFLAG_NSCERT 0x8 + +# define EXFLAG_CA 0x10 +# define EXFLAG_SI 0x20 /* self-issued, maybe not self-signed */ +# define EXFLAG_V1 0x40 +# define EXFLAG_INVALID 0x80 +/* EXFLAG_SET is set to indicate that some values have been precomputed */ +# define EXFLAG_SET 0x100 +# define EXFLAG_CRITICAL 0x200 +# define EXFLAG_PROXY 0x400 + +# define EXFLAG_INVALID_POLICY 0x800 +# define EXFLAG_FRESHEST 0x1000 +# define EXFLAG_SS 0x2000 /* cert is apparently self-signed */ + +# define EXFLAG_BCONS_CRITICAL 0x10000 +# define EXFLAG_AKID_CRITICAL 0x20000 +# define EXFLAG_SKID_CRITICAL 0x40000 +# define EXFLAG_SAN_CRITICAL 0x80000 +# define EXFLAG_NO_FINGERPRINT 0x100000 + +# define KU_DIGITAL_SIGNATURE 0x0080 +# define KU_NON_REPUDIATION 0x0040 +# define KU_KEY_ENCIPHERMENT 0x0020 +# define KU_DATA_ENCIPHERMENT 0x0010 +# define KU_KEY_AGREEMENT 0x0008 +# define KU_KEY_CERT_SIGN 0x0004 +# define KU_CRL_SIGN 0x0002 +# define KU_ENCIPHER_ONLY 0x0001 +# define KU_DECIPHER_ONLY 0x8000 + +# define NS_SSL_CLIENT 0x80 +# define NS_SSL_SERVER 0x40 +# define NS_SMIME 0x20 +# define NS_OBJSIGN 0x10 +# define NS_SSL_CA 0x04 +# define NS_SMIME_CA 0x02 +# define NS_OBJSIGN_CA 0x01 +# define NS_ANY_CA (NS_SSL_CA|NS_SMIME_CA|NS_OBJSIGN_CA) + +# define XKU_SSL_SERVER 0x1 +# define XKU_SSL_CLIENT 0x2 +# define XKU_SMIME 0x4 +# define XKU_CODE_SIGN 0x8 +# define XKU_SGC 0x10 /* Netscape or MS Server-Gated Crypto */ +# define XKU_OCSP_SIGN 0x20 +# define XKU_TIMESTAMP 0x40 +# define XKU_DVCS 0x80 +# define XKU_ANYEKU 0x100 + +# define X509_PURPOSE_DYNAMIC 0x1 +# define X509_PURPOSE_DYNAMIC_NAME 0x2 + +typedef struct x509_purpose_st { + int purpose; + int trust; /* Default trust ID */ + int flags; + int (*check_purpose) (const struct x509_purpose_st *, const X509 *, int); + char *name; + char *sname; + void *usr_data; +} X509_PURPOSE; + +SKM_DEFINE_STACK_OF_INTERNAL(X509_PURPOSE, X509_PURPOSE, X509_PURPOSE) +#define sk_X509_PURPOSE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_value(sk, idx) ((X509_PURPOSE *)OPENSSL_sk_value(ossl_check_const_X509_PURPOSE_sk_type(sk), (idx))) +#define sk_X509_PURPOSE_new(cmp) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new(ossl_check_X509_PURPOSE_compfunc_type(cmp))) +#define sk_X509_PURPOSE_new_null() ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new_null()) +#define sk_X509_PURPOSE_new_reserve(cmp, n) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new_reserve(ossl_check_X509_PURPOSE_compfunc_type(cmp), (n))) +#define sk_X509_PURPOSE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_PURPOSE_sk_type(sk), (n)) +#define sk_X509_PURPOSE_free(sk) OPENSSL_sk_free(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_delete(sk, i) ((X509_PURPOSE *)OPENSSL_sk_delete(ossl_check_X509_PURPOSE_sk_type(sk), (i))) +#define sk_X509_PURPOSE_delete_ptr(sk, ptr) ((X509_PURPOSE *)OPENSSL_sk_delete_ptr(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr))) +#define sk_X509_PURPOSE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_pop(sk) ((X509_PURPOSE *)OPENSSL_sk_pop(ossl_check_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_shift(sk) ((X509_PURPOSE *)OPENSSL_sk_shift(ossl_check_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_PURPOSE_sk_type(sk),ossl_check_X509_PURPOSE_freefunc_type(freefunc)) +#define sk_X509_PURPOSE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr), (idx)) +#define sk_X509_PURPOSE_set(sk, idx, ptr) ((X509_PURPOSE *)OPENSSL_sk_set(ossl_check_X509_PURPOSE_sk_type(sk), (idx), ossl_check_X509_PURPOSE_type(ptr))) +#define sk_X509_PURPOSE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr), pnum) +#define sk_X509_PURPOSE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_dup(sk) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_dup(ossl_check_const_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_copyfunc_type(copyfunc), ossl_check_X509_PURPOSE_freefunc_type(freefunc))) +#define sk_X509_PURPOSE_set_cmp_func(sk, cmp) ((sk_X509_PURPOSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_compfunc_type(cmp))) + + + +# define X509_PURPOSE_SSL_CLIENT 1 +# define X509_PURPOSE_SSL_SERVER 2 +# define X509_PURPOSE_NS_SSL_SERVER 3 +# define X509_PURPOSE_SMIME_SIGN 4 +# define X509_PURPOSE_SMIME_ENCRYPT 5 +# define X509_PURPOSE_CRL_SIGN 6 +# define X509_PURPOSE_ANY 7 +# define X509_PURPOSE_OCSP_HELPER 8 +# define X509_PURPOSE_TIMESTAMP_SIGN 9 +# define X509_PURPOSE_CODE_SIGN 10 + +# define X509_PURPOSE_MIN 1 +# define X509_PURPOSE_MAX 10 + +/* Flags for X509V3_EXT_print() */ + +# define X509V3_EXT_UNKNOWN_MASK (0xfL << 16) +/* Return error for unknown extensions */ +# define X509V3_EXT_DEFAULT 0 +/* Print error for unknown extensions */ +# define X509V3_EXT_ERROR_UNKNOWN (1L << 16) +/* ASN1 parse unknown extensions */ +# define X509V3_EXT_PARSE_UNKNOWN (2L << 16) +/* BIO_dump unknown extensions */ +# define X509V3_EXT_DUMP_UNKNOWN (3L << 16) + +/* Flags for X509V3_add1_i2d */ + +# define X509V3_ADD_OP_MASK 0xfL +# define X509V3_ADD_DEFAULT 0L +# define X509V3_ADD_APPEND 1L +# define X509V3_ADD_REPLACE 2L +# define X509V3_ADD_REPLACE_EXISTING 3L +# define X509V3_ADD_KEEP_EXISTING 4L +# define X509V3_ADD_DELETE 5L +# define X509V3_ADD_SILENT 0x10 + +DECLARE_ASN1_FUNCTIONS(BASIC_CONSTRAINTS) + +DECLARE_ASN1_FUNCTIONS(SXNET) +DECLARE_ASN1_FUNCTIONS(SXNETID) + +DECLARE_ASN1_FUNCTIONS(ISSUER_SIGN_TOOL) + +int SXNET_add_id_asc(SXNET **psx, const char *zone, const char *user, int userlen); +int SXNET_add_id_ulong(SXNET **psx, unsigned long lzone, const char *user, + int userlen); +int SXNET_add_id_INTEGER(SXNET **psx, ASN1_INTEGER *izone, const char *user, + int userlen); + +ASN1_OCTET_STRING *SXNET_get_id_asc(SXNET *sx, const char *zone); +ASN1_OCTET_STRING *SXNET_get_id_ulong(SXNET *sx, unsigned long lzone); +ASN1_OCTET_STRING *SXNET_get_id_INTEGER(SXNET *sx, ASN1_INTEGER *zone); + +DECLARE_ASN1_FUNCTIONS(AUTHORITY_KEYID) + +DECLARE_ASN1_FUNCTIONS(PKEY_USAGE_PERIOD) + +DECLARE_ASN1_FUNCTIONS(GENERAL_NAME) +DECLARE_ASN1_DUP_FUNCTION(GENERAL_NAME) +int GENERAL_NAME_cmp(GENERAL_NAME *a, GENERAL_NAME *b); + +ASN1_BIT_STRING *v2i_ASN1_BIT_STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, + STACK_OF(CONF_VALUE) *nval); +STACK_OF(CONF_VALUE) *i2v_ASN1_BIT_STRING(X509V3_EXT_METHOD *method, + ASN1_BIT_STRING *bits, + STACK_OF(CONF_VALUE) *extlist); +char *i2s_ASN1_IA5STRING(X509V3_EXT_METHOD *method, ASN1_IA5STRING *ia5); +ASN1_IA5STRING *s2i_ASN1_IA5STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); +char *i2s_ASN1_UTF8STRING(X509V3_EXT_METHOD *method, ASN1_UTF8STRING *utf8); +ASN1_UTF8STRING *s2i_ASN1_UTF8STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); + +STACK_OF(CONF_VALUE) *i2v_GENERAL_NAME(X509V3_EXT_METHOD *method, + GENERAL_NAME *gen, + STACK_OF(CONF_VALUE) *ret); +int GENERAL_NAME_print(BIO *out, GENERAL_NAME *gen); + +DECLARE_ASN1_FUNCTIONS(GENERAL_NAMES) + +STACK_OF(CONF_VALUE) *i2v_GENERAL_NAMES(X509V3_EXT_METHOD *method, + GENERAL_NAMES *gen, + STACK_OF(CONF_VALUE) *extlist); +GENERAL_NAMES *v2i_GENERAL_NAMES(const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *nval); + +DECLARE_ASN1_FUNCTIONS(OTHERNAME) +DECLARE_ASN1_FUNCTIONS(EDIPARTYNAME) +int OTHERNAME_cmp(OTHERNAME *a, OTHERNAME *b); +void GENERAL_NAME_set0_value(GENERAL_NAME *a, int type, void *value); +void *GENERAL_NAME_get0_value(const GENERAL_NAME *a, int *ptype); +int GENERAL_NAME_set0_othername(GENERAL_NAME *gen, + ASN1_OBJECT *oid, ASN1_TYPE *value); +int GENERAL_NAME_get0_otherName(const GENERAL_NAME *gen, + ASN1_OBJECT **poid, ASN1_TYPE **pvalue); + +char *i2s_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, + const ASN1_OCTET_STRING *ia5); +ASN1_OCTET_STRING *s2i_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); + +DECLARE_ASN1_FUNCTIONS(EXTENDED_KEY_USAGE) +int i2a_ACCESS_DESCRIPTION(BIO *bp, const ACCESS_DESCRIPTION *a); + +DECLARE_ASN1_ALLOC_FUNCTIONS(TLS_FEATURE) + +DECLARE_ASN1_FUNCTIONS(CERTIFICATEPOLICIES) +DECLARE_ASN1_FUNCTIONS(POLICYINFO) +DECLARE_ASN1_FUNCTIONS(POLICYQUALINFO) +DECLARE_ASN1_FUNCTIONS(USERNOTICE) +DECLARE_ASN1_FUNCTIONS(NOTICEREF) + +DECLARE_ASN1_FUNCTIONS(CRL_DIST_POINTS) +DECLARE_ASN1_FUNCTIONS(DIST_POINT) +DECLARE_ASN1_FUNCTIONS(DIST_POINT_NAME) +DECLARE_ASN1_FUNCTIONS(ISSUING_DIST_POINT) + +int DIST_POINT_set_dpname(DIST_POINT_NAME *dpn, const X509_NAME *iname); + +int NAME_CONSTRAINTS_check(X509 *x, NAME_CONSTRAINTS *nc); +int NAME_CONSTRAINTS_check_CN(X509 *x, NAME_CONSTRAINTS *nc); + +DECLARE_ASN1_FUNCTIONS(ACCESS_DESCRIPTION) +DECLARE_ASN1_FUNCTIONS(AUTHORITY_INFO_ACCESS) + +DECLARE_ASN1_ITEM(POLICY_MAPPING) +DECLARE_ASN1_ALLOC_FUNCTIONS(POLICY_MAPPING) +DECLARE_ASN1_ITEM(POLICY_MAPPINGS) + +DECLARE_ASN1_ITEM(GENERAL_SUBTREE) +DECLARE_ASN1_ALLOC_FUNCTIONS(GENERAL_SUBTREE) + +DECLARE_ASN1_ITEM(NAME_CONSTRAINTS) +DECLARE_ASN1_ALLOC_FUNCTIONS(NAME_CONSTRAINTS) + +DECLARE_ASN1_ALLOC_FUNCTIONS(POLICY_CONSTRAINTS) +DECLARE_ASN1_ITEM(POLICY_CONSTRAINTS) + +GENERAL_NAME *a2i_GENERAL_NAME(GENERAL_NAME *out, + const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, int gen_type, + const char *value, int is_nc); + +# ifdef OPENSSL_CONF_H +GENERAL_NAME *v2i_GENERAL_NAME(const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, CONF_VALUE *cnf); +GENERAL_NAME *v2i_GENERAL_NAME_ex(GENERAL_NAME *out, + const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, CONF_VALUE *cnf, + int is_nc); + +void X509V3_conf_free(CONF_VALUE *val); + +X509_EXTENSION *X509V3_EXT_nconf_nid(CONF *conf, X509V3_CTX *ctx, int ext_nid, + const char *value); +X509_EXTENSION *X509V3_EXT_nconf(CONF *conf, X509V3_CTX *ctx, const char *name, + const char *value); +int X509V3_EXT_add_nconf_sk(CONF *conf, X509V3_CTX *ctx, const char *section, + STACK_OF(X509_EXTENSION) **sk); +int X509V3_EXT_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509 *cert); +int X509V3_EXT_REQ_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509_REQ *req); +int X509V3_EXT_CRL_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509_CRL *crl); + +X509_EXTENSION *X509V3_EXT_conf_nid(LHASH_OF(CONF_VALUE) *conf, + X509V3_CTX *ctx, int ext_nid, + const char *value); +X509_EXTENSION *X509V3_EXT_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *name, const char *value); +int X509V3_EXT_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509 *cert); +int X509V3_EXT_REQ_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509_REQ *req); +int X509V3_EXT_CRL_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509_CRL *crl); + +int X509V3_add_value_bool_nf(const char *name, int asn1_bool, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_get_value_bool(const CONF_VALUE *value, int *asn1_bool); +int X509V3_get_value_int(const CONF_VALUE *value, ASN1_INTEGER **aint); +void X509V3_set_nconf(X509V3_CTX *ctx, CONF *conf); +void X509V3_set_conf_lhash(X509V3_CTX *ctx, LHASH_OF(CONF_VALUE) *lhash); +# endif + +char *X509V3_get_string(X509V3_CTX *ctx, const char *name, const char *section); +STACK_OF(CONF_VALUE) *X509V3_get_section(X509V3_CTX *ctx, const char *section); +void X509V3_string_free(X509V3_CTX *ctx, char *str); +void X509V3_section_free(X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *section); +void X509V3_set_ctx(X509V3_CTX *ctx, X509 *issuer, X509 *subject, + X509_REQ *req, X509_CRL *crl, int flags); +/* For API backward compatibility, this is separate from X509V3_set_ctx(): */ +int X509V3_set_issuer_pkey(X509V3_CTX *ctx, EVP_PKEY *pkey); + +int X509V3_add_value(const char *name, const char *value, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_uchar(const char *name, const unsigned char *value, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_bool(const char *name, int asn1_bool, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_int(const char *name, const ASN1_INTEGER *aint, + STACK_OF(CONF_VALUE) **extlist); +char *i2s_ASN1_INTEGER(X509V3_EXT_METHOD *meth, const ASN1_INTEGER *aint); +ASN1_INTEGER *s2i_ASN1_INTEGER(X509V3_EXT_METHOD *meth, const char *value); +char *i2s_ASN1_ENUMERATED(X509V3_EXT_METHOD *meth, const ASN1_ENUMERATED *aint); +char *i2s_ASN1_ENUMERATED_TABLE(X509V3_EXT_METHOD *meth, + const ASN1_ENUMERATED *aint); +int X509V3_EXT_add(X509V3_EXT_METHOD *ext); +int X509V3_EXT_add_list(X509V3_EXT_METHOD *extlist); +int X509V3_EXT_add_alias(int nid_to, int nid_from); +void X509V3_EXT_cleanup(void); + +const X509V3_EXT_METHOD *X509V3_EXT_get(X509_EXTENSION *ext); +const X509V3_EXT_METHOD *X509V3_EXT_get_nid(int nid); +int X509V3_add_standard_extensions(void); +STACK_OF(CONF_VALUE) *X509V3_parse_list(const char *line); +void *X509V3_EXT_d2i(X509_EXTENSION *ext); +void *X509V3_get_d2i(const STACK_OF(X509_EXTENSION) *x, int nid, int *crit, + int *idx); + +X509_EXTENSION *X509V3_EXT_i2d(int ext_nid, int crit, void *ext_struc); +int X509V3_add1_i2d(STACK_OF(X509_EXTENSION) **x, int nid, void *value, + int crit, unsigned long flags); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* The new declarations are in crypto.h, but the old ones were here. */ +# define hex_to_string OPENSSL_buf2hexstr +# define string_to_hex OPENSSL_hexstr2buf +#endif + +void X509V3_EXT_val_prn(BIO *out, STACK_OF(CONF_VALUE) *val, int indent, + int ml); +int X509V3_EXT_print(BIO *out, X509_EXTENSION *ext, unsigned long flag, + int indent); +#ifndef OPENSSL_NO_STDIO +int X509V3_EXT_print_fp(FILE *out, X509_EXTENSION *ext, int flag, int indent); +#endif +int X509V3_extensions_print(BIO *out, const char *title, + const STACK_OF(X509_EXTENSION) *exts, + unsigned long flag, int indent); + +int X509_check_ca(X509 *x); +int X509_check_purpose(X509 *x, int id, int ca); +int X509_supported_extension(X509_EXTENSION *ex); +int X509_PURPOSE_set(int *p, int purpose); +int X509_check_issued(X509 *issuer, X509 *subject); +int X509_check_akid(const X509 *issuer, const AUTHORITY_KEYID *akid); +void X509_set_proxy_flag(X509 *x); +void X509_set_proxy_pathlen(X509 *x, long l); +long X509_get_proxy_pathlen(X509 *x); + +uint32_t X509_get_extension_flags(X509 *x); +uint32_t X509_get_key_usage(X509 *x); +uint32_t X509_get_extended_key_usage(X509 *x); +const ASN1_OCTET_STRING *X509_get0_subject_key_id(X509 *x); +const ASN1_OCTET_STRING *X509_get0_authority_key_id(X509 *x); +const GENERAL_NAMES *X509_get0_authority_issuer(X509 *x); +const ASN1_INTEGER *X509_get0_authority_serial(X509 *x); + +int X509_PURPOSE_get_count(void); +X509_PURPOSE *X509_PURPOSE_get0(int idx); +int X509_PURPOSE_get_by_sname(const char *sname); +int X509_PURPOSE_get_by_id(int id); +int X509_PURPOSE_add(int id, int trust, int flags, + int (*ck) (const X509_PURPOSE *, const X509 *, int), + const char *name, const char *sname, void *arg); +char *X509_PURPOSE_get0_name(const X509_PURPOSE *xp); +char *X509_PURPOSE_get0_sname(const X509_PURPOSE *xp); +int X509_PURPOSE_get_trust(const X509_PURPOSE *xp); +void X509_PURPOSE_cleanup(void); +int X509_PURPOSE_get_id(const X509_PURPOSE *); + +STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x); +STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x); +void X509_email_free(STACK_OF(OPENSSL_STRING) *sk); +STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x); +/* Flags for X509_check_* functions */ + +/* + * Always check subject name for host match even if subject alt names present + */ +# define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT 0x1 +/* Disable wildcard matching for dnsName fields and common name. */ +# define X509_CHECK_FLAG_NO_WILDCARDS 0x2 +/* Wildcards must not match a partial label. */ +# define X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS 0x4 +/* Allow (non-partial) wildcards to match multiple labels. */ +# define X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS 0x8 +/* Constraint verifier subdomain patterns to match a single labels. */ +# define X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS 0x10 +/* Never check the subject CN */ +# define X509_CHECK_FLAG_NEVER_CHECK_SUBJECT 0x20 +/* + * Match reference identifiers starting with "." to any sub-domain. + * This is a non-public flag, turned on implicitly when the subject + * reference identity is a DNS name. + */ +# define _X509_CHECK_FLAG_DOT_SUBDOMAINS 0x8000 + +int X509_check_host(X509 *x, const char *chk, size_t chklen, + unsigned int flags, char **peername); +int X509_check_email(X509 *x, const char *chk, size_t chklen, + unsigned int flags); +int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen, + unsigned int flags); +int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags); + +ASN1_OCTET_STRING *a2i_IPADDRESS(const char *ipasc); +ASN1_OCTET_STRING *a2i_IPADDRESS_NC(const char *ipasc); +int X509V3_NAME_from_section(X509_NAME *nm, STACK_OF(CONF_VALUE) *dn_sk, + unsigned long chtype); + +void X509_POLICY_NODE_print(BIO *out, X509_POLICY_NODE *node, int indent); +SKM_DEFINE_STACK_OF_INTERNAL(X509_POLICY_NODE, X509_POLICY_NODE, X509_POLICY_NODE) +#define sk_X509_POLICY_NODE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_value(sk, idx) ((X509_POLICY_NODE *)OPENSSL_sk_value(ossl_check_const_X509_POLICY_NODE_sk_type(sk), (idx))) +#define sk_X509_POLICY_NODE_new(cmp) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new(ossl_check_X509_POLICY_NODE_compfunc_type(cmp))) +#define sk_X509_POLICY_NODE_new_null() ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new_null()) +#define sk_X509_POLICY_NODE_new_reserve(cmp, n) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new_reserve(ossl_check_X509_POLICY_NODE_compfunc_type(cmp), (n))) +#define sk_X509_POLICY_NODE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_POLICY_NODE_sk_type(sk), (n)) +#define sk_X509_POLICY_NODE_free(sk) OPENSSL_sk_free(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_delete(sk, i) ((X509_POLICY_NODE *)OPENSSL_sk_delete(ossl_check_X509_POLICY_NODE_sk_type(sk), (i))) +#define sk_X509_POLICY_NODE_delete_ptr(sk, ptr) ((X509_POLICY_NODE *)OPENSSL_sk_delete_ptr(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr))) +#define sk_X509_POLICY_NODE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_pop(sk) ((X509_POLICY_NODE *)OPENSSL_sk_pop(ossl_check_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_shift(sk) ((X509_POLICY_NODE *)OPENSSL_sk_shift(ossl_check_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_POLICY_NODE_sk_type(sk),ossl_check_X509_POLICY_NODE_freefunc_type(freefunc)) +#define sk_X509_POLICY_NODE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr), (idx)) +#define sk_X509_POLICY_NODE_set(sk, idx, ptr) ((X509_POLICY_NODE *)OPENSSL_sk_set(ossl_check_X509_POLICY_NODE_sk_type(sk), (idx), ossl_check_X509_POLICY_NODE_type(ptr))) +#define sk_X509_POLICY_NODE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr), pnum) +#define sk_X509_POLICY_NODE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_dup(sk) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_dup(ossl_check_const_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_copyfunc_type(copyfunc), ossl_check_X509_POLICY_NODE_freefunc_type(freefunc))) +#define sk_X509_POLICY_NODE_set_cmp_func(sk, cmp) ((sk_X509_POLICY_NODE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_compfunc_type(cmp))) + + + +#ifndef OPENSSL_NO_RFC3779 +typedef struct ASRange_st { + ASN1_INTEGER *min, *max; +} ASRange; + +# define ASIdOrRange_id 0 +# define ASIdOrRange_range 1 + +typedef struct ASIdOrRange_st { + int type; + union { + ASN1_INTEGER *id; + ASRange *range; + } u; +} ASIdOrRange; + +SKM_DEFINE_STACK_OF_INTERNAL(ASIdOrRange, ASIdOrRange, ASIdOrRange) +#define sk_ASIdOrRange_num(sk) OPENSSL_sk_num(ossl_check_const_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_value(sk, idx) ((ASIdOrRange *)OPENSSL_sk_value(ossl_check_const_ASIdOrRange_sk_type(sk), (idx))) +#define sk_ASIdOrRange_new(cmp) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new(ossl_check_ASIdOrRange_compfunc_type(cmp))) +#define sk_ASIdOrRange_new_null() ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new_null()) +#define sk_ASIdOrRange_new_reserve(cmp, n) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new_reserve(ossl_check_ASIdOrRange_compfunc_type(cmp), (n))) +#define sk_ASIdOrRange_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASIdOrRange_sk_type(sk), (n)) +#define sk_ASIdOrRange_free(sk) OPENSSL_sk_free(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_zero(sk) OPENSSL_sk_zero(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_delete(sk, i) ((ASIdOrRange *)OPENSSL_sk_delete(ossl_check_ASIdOrRange_sk_type(sk), (i))) +#define sk_ASIdOrRange_delete_ptr(sk, ptr) ((ASIdOrRange *)OPENSSL_sk_delete_ptr(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr))) +#define sk_ASIdOrRange_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_pop(sk) ((ASIdOrRange *)OPENSSL_sk_pop(ossl_check_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_shift(sk) ((ASIdOrRange *)OPENSSL_sk_shift(ossl_check_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASIdOrRange_sk_type(sk),ossl_check_ASIdOrRange_freefunc_type(freefunc)) +#define sk_ASIdOrRange_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr), (idx)) +#define sk_ASIdOrRange_set(sk, idx, ptr) ((ASIdOrRange *)OPENSSL_sk_set(ossl_check_ASIdOrRange_sk_type(sk), (idx), ossl_check_ASIdOrRange_type(ptr))) +#define sk_ASIdOrRange_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr), pnum) +#define sk_ASIdOrRange_sort(sk) OPENSSL_sk_sort(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_dup(sk) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_dup(ossl_check_const_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_deep_copy(ossl_check_const_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_copyfunc_type(copyfunc), ossl_check_ASIdOrRange_freefunc_type(freefunc))) +#define sk_ASIdOrRange_set_cmp_func(sk, cmp) ((sk_ASIdOrRange_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_compfunc_type(cmp))) + + +typedef STACK_OF(ASIdOrRange) ASIdOrRanges; + +# define ASIdentifierChoice_inherit 0 +# define ASIdentifierChoice_asIdsOrRanges 1 + +typedef struct ASIdentifierChoice_st { + int type; + union { + ASN1_NULL *inherit; + ASIdOrRanges *asIdsOrRanges; + } u; +} ASIdentifierChoice; + +typedef struct ASIdentifiers_st { + ASIdentifierChoice *asnum, *rdi; +} ASIdentifiers; + +DECLARE_ASN1_FUNCTIONS(ASRange) +DECLARE_ASN1_FUNCTIONS(ASIdOrRange) +DECLARE_ASN1_FUNCTIONS(ASIdentifierChoice) +DECLARE_ASN1_FUNCTIONS(ASIdentifiers) + +typedef struct IPAddressRange_st { + ASN1_BIT_STRING *min, *max; +} IPAddressRange; + +# define IPAddressOrRange_addressPrefix 0 +# define IPAddressOrRange_addressRange 1 + +typedef struct IPAddressOrRange_st { + int type; + union { + ASN1_BIT_STRING *addressPrefix; + IPAddressRange *addressRange; + } u; +} IPAddressOrRange; + +SKM_DEFINE_STACK_OF_INTERNAL(IPAddressOrRange, IPAddressOrRange, IPAddressOrRange) +#define sk_IPAddressOrRange_num(sk) OPENSSL_sk_num(ossl_check_const_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_value(sk, idx) ((IPAddressOrRange *)OPENSSL_sk_value(ossl_check_const_IPAddressOrRange_sk_type(sk), (idx))) +#define sk_IPAddressOrRange_new(cmp) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new(ossl_check_IPAddressOrRange_compfunc_type(cmp))) +#define sk_IPAddressOrRange_new_null() ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new_null()) +#define sk_IPAddressOrRange_new_reserve(cmp, n) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new_reserve(ossl_check_IPAddressOrRange_compfunc_type(cmp), (n))) +#define sk_IPAddressOrRange_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_IPAddressOrRange_sk_type(sk), (n)) +#define sk_IPAddressOrRange_free(sk) OPENSSL_sk_free(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_zero(sk) OPENSSL_sk_zero(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_delete(sk, i) ((IPAddressOrRange *)OPENSSL_sk_delete(ossl_check_IPAddressOrRange_sk_type(sk), (i))) +#define sk_IPAddressOrRange_delete_ptr(sk, ptr) ((IPAddressOrRange *)OPENSSL_sk_delete_ptr(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr))) +#define sk_IPAddressOrRange_push(sk, ptr) OPENSSL_sk_push(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_pop(sk) ((IPAddressOrRange *)OPENSSL_sk_pop(ossl_check_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_shift(sk) ((IPAddressOrRange *)OPENSSL_sk_shift(ossl_check_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_IPAddressOrRange_sk_type(sk),ossl_check_IPAddressOrRange_freefunc_type(freefunc)) +#define sk_IPAddressOrRange_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr), (idx)) +#define sk_IPAddressOrRange_set(sk, idx, ptr) ((IPAddressOrRange *)OPENSSL_sk_set(ossl_check_IPAddressOrRange_sk_type(sk), (idx), ossl_check_IPAddressOrRange_type(ptr))) +#define sk_IPAddressOrRange_find(sk, ptr) OPENSSL_sk_find(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr), pnum) +#define sk_IPAddressOrRange_sort(sk) OPENSSL_sk_sort(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_dup(sk) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_dup(ossl_check_const_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_deep_copy(ossl_check_const_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_copyfunc_type(copyfunc), ossl_check_IPAddressOrRange_freefunc_type(freefunc))) +#define sk_IPAddressOrRange_set_cmp_func(sk, cmp) ((sk_IPAddressOrRange_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_compfunc_type(cmp))) + + +typedef STACK_OF(IPAddressOrRange) IPAddressOrRanges; + +# define IPAddressChoice_inherit 0 +# define IPAddressChoice_addressesOrRanges 1 + +typedef struct IPAddressChoice_st { + int type; + union { + ASN1_NULL *inherit; + IPAddressOrRanges *addressesOrRanges; + } u; +} IPAddressChoice; + +typedef struct IPAddressFamily_st { + ASN1_OCTET_STRING *addressFamily; + IPAddressChoice *ipAddressChoice; +} IPAddressFamily; + +SKM_DEFINE_STACK_OF_INTERNAL(IPAddressFamily, IPAddressFamily, IPAddressFamily) +#define sk_IPAddressFamily_num(sk) OPENSSL_sk_num(ossl_check_const_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_value(sk, idx) ((IPAddressFamily *)OPENSSL_sk_value(ossl_check_const_IPAddressFamily_sk_type(sk), (idx))) +#define sk_IPAddressFamily_new(cmp) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new(ossl_check_IPAddressFamily_compfunc_type(cmp))) +#define sk_IPAddressFamily_new_null() ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new_null()) +#define sk_IPAddressFamily_new_reserve(cmp, n) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new_reserve(ossl_check_IPAddressFamily_compfunc_type(cmp), (n))) +#define sk_IPAddressFamily_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_IPAddressFamily_sk_type(sk), (n)) +#define sk_IPAddressFamily_free(sk) OPENSSL_sk_free(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_zero(sk) OPENSSL_sk_zero(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_delete(sk, i) ((IPAddressFamily *)OPENSSL_sk_delete(ossl_check_IPAddressFamily_sk_type(sk), (i))) +#define sk_IPAddressFamily_delete_ptr(sk, ptr) ((IPAddressFamily *)OPENSSL_sk_delete_ptr(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr))) +#define sk_IPAddressFamily_push(sk, ptr) OPENSSL_sk_push(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_pop(sk) ((IPAddressFamily *)OPENSSL_sk_pop(ossl_check_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_shift(sk) ((IPAddressFamily *)OPENSSL_sk_shift(ossl_check_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_IPAddressFamily_sk_type(sk),ossl_check_IPAddressFamily_freefunc_type(freefunc)) +#define sk_IPAddressFamily_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr), (idx)) +#define sk_IPAddressFamily_set(sk, idx, ptr) ((IPAddressFamily *)OPENSSL_sk_set(ossl_check_IPAddressFamily_sk_type(sk), (idx), ossl_check_IPAddressFamily_type(ptr))) +#define sk_IPAddressFamily_find(sk, ptr) OPENSSL_sk_find(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr), pnum) +#define sk_IPAddressFamily_sort(sk) OPENSSL_sk_sort(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_dup(sk) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_dup(ossl_check_const_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_deep_copy(ossl_check_const_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_copyfunc_type(copyfunc), ossl_check_IPAddressFamily_freefunc_type(freefunc))) +#define sk_IPAddressFamily_set_cmp_func(sk, cmp) ((sk_IPAddressFamily_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_compfunc_type(cmp))) + + + +typedef STACK_OF(IPAddressFamily) IPAddrBlocks; + +DECLARE_ASN1_FUNCTIONS(IPAddressRange) +DECLARE_ASN1_FUNCTIONS(IPAddressOrRange) +DECLARE_ASN1_FUNCTIONS(IPAddressChoice) +DECLARE_ASN1_FUNCTIONS(IPAddressFamily) + +/* + * API tag for elements of the ASIdentifer SEQUENCE. + */ +# define V3_ASID_ASNUM 0 +# define V3_ASID_RDI 1 + +/* + * AFI values, assigned by IANA. It'd be nice to make the AFI + * handling code totally generic, but there are too many little things + * that would need to be defined for other address families for it to + * be worth the trouble. + */ +# define IANA_AFI_IPV4 1 +# define IANA_AFI_IPV6 2 + +/* + * Utilities to construct and extract values from RFC3779 extensions, + * since some of the encodings (particularly for IP address prefixes + * and ranges) are a bit tedious to work with directly. + */ +int X509v3_asid_add_inherit(ASIdentifiers *asid, int which); +int X509v3_asid_add_id_or_range(ASIdentifiers *asid, int which, + ASN1_INTEGER *min, ASN1_INTEGER *max); +int X509v3_addr_add_inherit(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi); +int X509v3_addr_add_prefix(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi, + unsigned char *a, const int prefixlen); +int X509v3_addr_add_range(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi, + unsigned char *min, unsigned char *max); +unsigned X509v3_addr_get_afi(const IPAddressFamily *f); +int X509v3_addr_get_range(IPAddressOrRange *aor, const unsigned afi, + unsigned char *min, unsigned char *max, + const int length); + +/* + * Canonical forms. + */ +int X509v3_asid_is_canonical(ASIdentifiers *asid); +int X509v3_addr_is_canonical(IPAddrBlocks *addr); +int X509v3_asid_canonize(ASIdentifiers *asid); +int X509v3_addr_canonize(IPAddrBlocks *addr); + +/* + * Tests for inheritance and containment. + */ +int X509v3_asid_inherits(ASIdentifiers *asid); +int X509v3_addr_inherits(IPAddrBlocks *addr); +int X509v3_asid_subset(ASIdentifiers *a, ASIdentifiers *b); +int X509v3_addr_subset(IPAddrBlocks *a, IPAddrBlocks *b); + +/* + * Check whether RFC 3779 extensions nest properly in chains. + */ +int X509v3_asid_validate_path(X509_STORE_CTX *); +int X509v3_addr_validate_path(X509_STORE_CTX *); +int X509v3_asid_validate_resource_set(STACK_OF(X509) *chain, + ASIdentifiers *ext, + int allow_inheritance); +int X509v3_addr_validate_resource_set(STACK_OF(X509) *chain, + IPAddrBlocks *ext, int allow_inheritance); + +#endif /* OPENSSL_NO_RFC3779 */ + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_STRING, ASN1_STRING, ASN1_STRING) +#define sk_ASN1_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_value(sk, idx) ((ASN1_STRING *)OPENSSL_sk_value(ossl_check_const_ASN1_STRING_sk_type(sk), (idx))) +#define sk_ASN1_STRING_new(cmp) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new(ossl_check_ASN1_STRING_compfunc_type(cmp))) +#define sk_ASN1_STRING_new_null() ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_STRING_new_reserve(cmp, n) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_STRING_compfunc_type(cmp), (n))) +#define sk_ASN1_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_STRING_sk_type(sk), (n)) +#define sk_ASN1_STRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_delete(sk, i) ((ASN1_STRING *)OPENSSL_sk_delete(ossl_check_ASN1_STRING_sk_type(sk), (i))) +#define sk_ASN1_STRING_delete_ptr(sk, ptr) ((ASN1_STRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr))) +#define sk_ASN1_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_pop(sk) ((ASN1_STRING *)OPENSSL_sk_pop(ossl_check_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_shift(sk) ((ASN1_STRING *)OPENSSL_sk_shift(ossl_check_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_STRING_sk_type(sk),ossl_check_ASN1_STRING_freefunc_type(freefunc)) +#define sk_ASN1_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr), (idx)) +#define sk_ASN1_STRING_set(sk, idx, ptr) ((ASN1_STRING *)OPENSSL_sk_set(ossl_check_ASN1_STRING_sk_type(sk), (idx), ossl_check_ASN1_STRING_type(ptr))) +#define sk_ASN1_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr), pnum) +#define sk_ASN1_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_dup(sk) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_copyfunc_type(copyfunc), ossl_check_ASN1_STRING_freefunc_type(freefunc))) +#define sk_ASN1_STRING_set_cmp_func(sk, cmp) ((sk_ASN1_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_compfunc_type(cmp))) + + +/* + * Admission Syntax + */ +typedef struct NamingAuthority_st NAMING_AUTHORITY; +typedef struct ProfessionInfo_st PROFESSION_INFO; +typedef struct Admissions_st ADMISSIONS; +typedef struct AdmissionSyntax_st ADMISSION_SYNTAX; +DECLARE_ASN1_FUNCTIONS(NAMING_AUTHORITY) +DECLARE_ASN1_FUNCTIONS(PROFESSION_INFO) +DECLARE_ASN1_FUNCTIONS(ADMISSIONS) +DECLARE_ASN1_FUNCTIONS(ADMISSION_SYNTAX) +SKM_DEFINE_STACK_OF_INTERNAL(PROFESSION_INFO, PROFESSION_INFO, PROFESSION_INFO) +#define sk_PROFESSION_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_value(sk, idx) ((PROFESSION_INFO *)OPENSSL_sk_value(ossl_check_const_PROFESSION_INFO_sk_type(sk), (idx))) +#define sk_PROFESSION_INFO_new(cmp) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new(ossl_check_PROFESSION_INFO_compfunc_type(cmp))) +#define sk_PROFESSION_INFO_new_null() ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new_null()) +#define sk_PROFESSION_INFO_new_reserve(cmp, n) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PROFESSION_INFO_compfunc_type(cmp), (n))) +#define sk_PROFESSION_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PROFESSION_INFO_sk_type(sk), (n)) +#define sk_PROFESSION_INFO_free(sk) OPENSSL_sk_free(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_delete(sk, i) ((PROFESSION_INFO *)OPENSSL_sk_delete(ossl_check_PROFESSION_INFO_sk_type(sk), (i))) +#define sk_PROFESSION_INFO_delete_ptr(sk, ptr) ((PROFESSION_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr))) +#define sk_PROFESSION_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_pop(sk) ((PROFESSION_INFO *)OPENSSL_sk_pop(ossl_check_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_shift(sk) ((PROFESSION_INFO *)OPENSSL_sk_shift(ossl_check_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PROFESSION_INFO_sk_type(sk),ossl_check_PROFESSION_INFO_freefunc_type(freefunc)) +#define sk_PROFESSION_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr), (idx)) +#define sk_PROFESSION_INFO_set(sk, idx, ptr) ((PROFESSION_INFO *)OPENSSL_sk_set(ossl_check_PROFESSION_INFO_sk_type(sk), (idx), ossl_check_PROFESSION_INFO_type(ptr))) +#define sk_PROFESSION_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr), pnum) +#define sk_PROFESSION_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_dup(sk) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_dup(ossl_check_const_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_copyfunc_type(copyfunc), ossl_check_PROFESSION_INFO_freefunc_type(freefunc))) +#define sk_PROFESSION_INFO_set_cmp_func(sk, cmp) ((sk_PROFESSION_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(ADMISSIONS, ADMISSIONS, ADMISSIONS) +#define sk_ADMISSIONS_num(sk) OPENSSL_sk_num(ossl_check_const_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_value(sk, idx) ((ADMISSIONS *)OPENSSL_sk_value(ossl_check_const_ADMISSIONS_sk_type(sk), (idx))) +#define sk_ADMISSIONS_new(cmp) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new(ossl_check_ADMISSIONS_compfunc_type(cmp))) +#define sk_ADMISSIONS_new_null() ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new_null()) +#define sk_ADMISSIONS_new_reserve(cmp, n) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new_reserve(ossl_check_ADMISSIONS_compfunc_type(cmp), (n))) +#define sk_ADMISSIONS_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ADMISSIONS_sk_type(sk), (n)) +#define sk_ADMISSIONS_free(sk) OPENSSL_sk_free(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_zero(sk) OPENSSL_sk_zero(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_delete(sk, i) ((ADMISSIONS *)OPENSSL_sk_delete(ossl_check_ADMISSIONS_sk_type(sk), (i))) +#define sk_ADMISSIONS_delete_ptr(sk, ptr) ((ADMISSIONS *)OPENSSL_sk_delete_ptr(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr))) +#define sk_ADMISSIONS_push(sk, ptr) OPENSSL_sk_push(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_pop(sk) ((ADMISSIONS *)OPENSSL_sk_pop(ossl_check_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_shift(sk) ((ADMISSIONS *)OPENSSL_sk_shift(ossl_check_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ADMISSIONS_sk_type(sk),ossl_check_ADMISSIONS_freefunc_type(freefunc)) +#define sk_ADMISSIONS_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr), (idx)) +#define sk_ADMISSIONS_set(sk, idx, ptr) ((ADMISSIONS *)OPENSSL_sk_set(ossl_check_ADMISSIONS_sk_type(sk), (idx), ossl_check_ADMISSIONS_type(ptr))) +#define sk_ADMISSIONS_find(sk, ptr) OPENSSL_sk_find(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr), pnum) +#define sk_ADMISSIONS_sort(sk) OPENSSL_sk_sort(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_dup(sk) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_dup(ossl_check_const_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_deep_copy(ossl_check_const_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_copyfunc_type(copyfunc), ossl_check_ADMISSIONS_freefunc_type(freefunc))) +#define sk_ADMISSIONS_set_cmp_func(sk, cmp) ((sk_ADMISSIONS_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_compfunc_type(cmp))) + +typedef STACK_OF(PROFESSION_INFO) PROFESSION_INFOS; + +const ASN1_OBJECT *NAMING_AUTHORITY_get0_authorityId( + const NAMING_AUTHORITY *n); +const ASN1_IA5STRING *NAMING_AUTHORITY_get0_authorityURL( + const NAMING_AUTHORITY *n); +const ASN1_STRING *NAMING_AUTHORITY_get0_authorityText( + const NAMING_AUTHORITY *n); +void NAMING_AUTHORITY_set0_authorityId(NAMING_AUTHORITY *n, + ASN1_OBJECT* namingAuthorityId); +void NAMING_AUTHORITY_set0_authorityURL(NAMING_AUTHORITY *n, + ASN1_IA5STRING* namingAuthorityUrl); +void NAMING_AUTHORITY_set0_authorityText(NAMING_AUTHORITY *n, + ASN1_STRING* namingAuthorityText); + +const GENERAL_NAME *ADMISSION_SYNTAX_get0_admissionAuthority( + const ADMISSION_SYNTAX *as); +void ADMISSION_SYNTAX_set0_admissionAuthority( + ADMISSION_SYNTAX *as, GENERAL_NAME *aa); +const STACK_OF(ADMISSIONS) *ADMISSION_SYNTAX_get0_contentsOfAdmissions( + const ADMISSION_SYNTAX *as); +void ADMISSION_SYNTAX_set0_contentsOfAdmissions( + ADMISSION_SYNTAX *as, STACK_OF(ADMISSIONS) *a); +const GENERAL_NAME *ADMISSIONS_get0_admissionAuthority(const ADMISSIONS *a); +void ADMISSIONS_set0_admissionAuthority(ADMISSIONS *a, GENERAL_NAME *aa); +const NAMING_AUTHORITY *ADMISSIONS_get0_namingAuthority(const ADMISSIONS *a); +void ADMISSIONS_set0_namingAuthority(ADMISSIONS *a, NAMING_AUTHORITY *na); +const PROFESSION_INFOS *ADMISSIONS_get0_professionInfos(const ADMISSIONS *a); +void ADMISSIONS_set0_professionInfos(ADMISSIONS *a, PROFESSION_INFOS *pi); +const ASN1_OCTET_STRING *PROFESSION_INFO_get0_addProfessionInfo( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_addProfessionInfo( + PROFESSION_INFO *pi, ASN1_OCTET_STRING *aos); +const NAMING_AUTHORITY *PROFESSION_INFO_get0_namingAuthority( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_namingAuthority( + PROFESSION_INFO *pi, NAMING_AUTHORITY *na); +const STACK_OF(ASN1_STRING) *PROFESSION_INFO_get0_professionItems( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_professionItems( + PROFESSION_INFO *pi, STACK_OF(ASN1_STRING) *as); +const STACK_OF(ASN1_OBJECT) *PROFESSION_INFO_get0_professionOIDs( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_professionOIDs( + PROFESSION_INFO *pi, STACK_OF(ASN1_OBJECT) *po); +const ASN1_PRINTABLESTRING *PROFESSION_INFO_get0_registrationNumber( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_registrationNumber( + PROFESSION_INFO *pi, ASN1_PRINTABLESTRING *rn); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_digests.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_digests.h new file mode 100644 index 00000000000..b184807c80c --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_digests.h @@ -0,0 +1,160 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_digests.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * sigAlgs OBJECT IDENTIFIER ::= { nistAlgorithms 3 } + */ +#define DER_OID_V_sigAlgs DER_P_OBJECT, 8, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03 +#define DER_OID_SZ_sigAlgs 10 +extern const unsigned char ossl_der_oid_sigAlgs[DER_OID_SZ_sigAlgs]; + +/* + * id-sha1 OBJECT IDENTIFIER ::= { iso(1) + * identified-organization(3) oiw(14) + * secsig(3) algorithms(2) 26 } + */ +#define DER_OID_V_id_sha1 DER_P_OBJECT, 5, 0x2B, 0x0E, 0x03, 0x02, 0x1A +#define DER_OID_SZ_id_sha1 7 +extern const unsigned char ossl_der_oid_id_sha1[DER_OID_SZ_id_sha1]; + +/* + * id-md2 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2) 2 } + */ +#define DER_OID_V_id_md2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x02 +#define DER_OID_SZ_id_md2 10 +extern const unsigned char ossl_der_oid_id_md2[DER_OID_SZ_id_md2]; + +/* + * id-md5 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2) 5 } + */ +#define DER_OID_V_id_md5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05 +#define DER_OID_SZ_id_md5 10 +extern const unsigned char ossl_der_oid_id_md5[DER_OID_SZ_id_md5]; + +/* + * id-sha256 OBJECT IDENTIFIER ::= { hashAlgs 1 } + */ +#define DER_OID_V_id_sha256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01 +#define DER_OID_SZ_id_sha256 11 +extern const unsigned char ossl_der_oid_id_sha256[DER_OID_SZ_id_sha256]; + +/* + * id-sha384 OBJECT IDENTIFIER ::= { hashAlgs 2 } + */ +#define DER_OID_V_id_sha384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02 +#define DER_OID_SZ_id_sha384 11 +extern const unsigned char ossl_der_oid_id_sha384[DER_OID_SZ_id_sha384]; + +/* + * id-sha512 OBJECT IDENTIFIER ::= { hashAlgs 3 } + */ +#define DER_OID_V_id_sha512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03 +#define DER_OID_SZ_id_sha512 11 +extern const unsigned char ossl_der_oid_id_sha512[DER_OID_SZ_id_sha512]; + +/* + * id-sha224 OBJECT IDENTIFIER ::= { hashAlgs 4 } + */ +#define DER_OID_V_id_sha224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04 +#define DER_OID_SZ_id_sha224 11 +extern const unsigned char ossl_der_oid_id_sha224[DER_OID_SZ_id_sha224]; + +/* + * id-sha512-224 OBJECT IDENTIFIER ::= { hashAlgs 5 } + */ +#define DER_OID_V_id_sha512_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x05 +#define DER_OID_SZ_id_sha512_224 11 +extern const unsigned char ossl_der_oid_id_sha512_224[DER_OID_SZ_id_sha512_224]; + +/* + * id-sha512-256 OBJECT IDENTIFIER ::= { hashAlgs 6 } + */ +#define DER_OID_V_id_sha512_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x06 +#define DER_OID_SZ_id_sha512_256 11 +extern const unsigned char ossl_der_oid_id_sha512_256[DER_OID_SZ_id_sha512_256]; + +/* + * id-sha3-224 OBJECT IDENTIFIER ::= { hashAlgs 7 } + */ +#define DER_OID_V_id_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x07 +#define DER_OID_SZ_id_sha3_224 11 +extern const unsigned char ossl_der_oid_id_sha3_224[DER_OID_SZ_id_sha3_224]; + +/* + * id-sha3-256 OBJECT IDENTIFIER ::= { hashAlgs 8 } + */ +#define DER_OID_V_id_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x08 +#define DER_OID_SZ_id_sha3_256 11 +extern const unsigned char ossl_der_oid_id_sha3_256[DER_OID_SZ_id_sha3_256]; + +/* + * id-sha3-384 OBJECT IDENTIFIER ::= { hashAlgs 9 } + */ +#define DER_OID_V_id_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x09 +#define DER_OID_SZ_id_sha3_384 11 +extern const unsigned char ossl_der_oid_id_sha3_384[DER_OID_SZ_id_sha3_384]; + +/* + * id-sha3-512 OBJECT IDENTIFIER ::= { hashAlgs 10 } + */ +#define DER_OID_V_id_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0A +#define DER_OID_SZ_id_sha3_512 11 +extern const unsigned char ossl_der_oid_id_sha3_512[DER_OID_SZ_id_sha3_512]; + +/* + * id-shake128 OBJECT IDENTIFIER ::= { hashAlgs 11 } + */ +#define DER_OID_V_id_shake128 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0B +#define DER_OID_SZ_id_shake128 11 +extern const unsigned char ossl_der_oid_id_shake128[DER_OID_SZ_id_shake128]; + +/* + * id-shake256 OBJECT IDENTIFIER ::= { hashAlgs 12 } + */ +#define DER_OID_V_id_shake256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0C +#define DER_OID_SZ_id_shake256 11 +extern const unsigned char ossl_der_oid_id_shake256[DER_OID_SZ_id_shake256]; + +/* + * id-shake128-len OBJECT IDENTIFIER ::= { hashAlgs 17 } + */ +#define DER_OID_V_id_shake128_len DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x11 +#define DER_OID_SZ_id_shake128_len 11 +extern const unsigned char ossl_der_oid_id_shake128_len[DER_OID_SZ_id_shake128_len]; + +/* + * id-shake256-len OBJECT IDENTIFIER ::= { hashAlgs 18 } + */ +#define DER_OID_V_id_shake256_len DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x12 +#define DER_OID_SZ_id_shake256_len 11 +extern const unsigned char ossl_der_oid_id_shake256_len[DER_OID_SZ_id_shake256_len]; + +/* + * id-KMACWithSHAKE128 OBJECT IDENTIFIER ::={hashAlgs 19} + */ +#define DER_OID_V_id_KMACWithSHAKE128 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x13 +#define DER_OID_SZ_id_KMACWithSHAKE128 11 +extern const unsigned char ossl_der_oid_id_KMACWithSHAKE128[DER_OID_SZ_id_KMACWithSHAKE128]; + +/* + * id-KMACWithSHAKE256 OBJECT IDENTIFIER ::={ hashAlgs 20} + */ +#define DER_OID_V_id_KMACWithSHAKE256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x14 +#define DER_OID_SZ_id_KMACWithSHAKE256 11 +extern const unsigned char ossl_der_oid_id_KMACWithSHAKE256[DER_OID_SZ_id_KMACWithSHAKE256]; + diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_dsa.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_dsa.h new file mode 100644 index 00000000000..b12a56282b2 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_dsa.h @@ -0,0 +1,94 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_dsa.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * id-dsa OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) x9-57(10040) x9algorithm(4) 1 } + */ +#define DER_OID_V_id_dsa DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x38, 0x04, 0x01 +#define DER_OID_SZ_id_dsa 9 +extern const unsigned char ossl_der_oid_id_dsa[DER_OID_SZ_id_dsa]; + +/* + * id-dsa-with-sha1 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) x9-57 (10040) x9algorithm(4) 3 } + */ +#define DER_OID_V_id_dsa_with_sha1 DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x38, 0x04, 0x03 +#define DER_OID_SZ_id_dsa_with_sha1 9 +extern const unsigned char ossl_der_oid_id_dsa_with_sha1[DER_OID_SZ_id_dsa_with_sha1]; + +/* + * id-dsa-with-sha224 OBJECT IDENTIFIER ::= { sigAlgs 1 } + */ +#define DER_OID_V_id_dsa_with_sha224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x01 +#define DER_OID_SZ_id_dsa_with_sha224 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha224[DER_OID_SZ_id_dsa_with_sha224]; + +/* + * id-dsa-with-sha256 OBJECT IDENTIFIER ::= { sigAlgs 2 } + */ +#define DER_OID_V_id_dsa_with_sha256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x02 +#define DER_OID_SZ_id_dsa_with_sha256 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha256[DER_OID_SZ_id_dsa_with_sha256]; + +/* + * id-dsa-with-sha384 OBJECT IDENTIFIER ::= { sigAlgs 3 } + */ +#define DER_OID_V_id_dsa_with_sha384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x03 +#define DER_OID_SZ_id_dsa_with_sha384 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha384[DER_OID_SZ_id_dsa_with_sha384]; + +/* + * id-dsa-with-sha512 OBJECT IDENTIFIER ::= { sigAlgs 4 } + */ +#define DER_OID_V_id_dsa_with_sha512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x04 +#define DER_OID_SZ_id_dsa_with_sha512 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha512[DER_OID_SZ_id_dsa_with_sha512]; + +/* + * id-dsa-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 5 } + */ +#define DER_OID_V_id_dsa_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x05 +#define DER_OID_SZ_id_dsa_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_224[DER_OID_SZ_id_dsa_with_sha3_224]; + +/* + * id-dsa-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 6 } + */ +#define DER_OID_V_id_dsa_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x06 +#define DER_OID_SZ_id_dsa_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_256[DER_OID_SZ_id_dsa_with_sha3_256]; + +/* + * id-dsa-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 7 } + */ +#define DER_OID_V_id_dsa_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x07 +#define DER_OID_SZ_id_dsa_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_384[DER_OID_SZ_id_dsa_with_sha3_384]; + +/* + * id-dsa-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 8 } + */ +#define DER_OID_V_id_dsa_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x08 +#define DER_OID_SZ_id_dsa_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_512[DER_OID_SZ_id_dsa_with_sha3_512]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_DSA(WPACKET *pkt, int tag, DSA *dsa); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_DSA_with_MD(WPACKET *pkt, int tag, + DSA *dsa, int mdnid); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ec.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ec.h new file mode 100644 index 00000000000..dd697771f71 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ec.h @@ -0,0 +1,286 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_ec.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/ec.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * ecdsa-with-SHA1 OBJECT IDENTIFIER ::= { id-ecSigType 1 } + */ +#define DER_OID_V_ecdsa_with_SHA1 DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x01 +#define DER_OID_SZ_ecdsa_with_SHA1 9 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA1[DER_OID_SZ_ecdsa_with_SHA1]; + +/* + * id-ecPublicKey OBJECT IDENTIFIER ::= { id-publicKeyType 1 } + */ +#define DER_OID_V_id_ecPublicKey DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01 +#define DER_OID_SZ_id_ecPublicKey 9 +extern const unsigned char ossl_der_oid_id_ecPublicKey[DER_OID_SZ_id_ecPublicKey]; + +/* + * c2pnb163v1 OBJECT IDENTIFIER ::= { c-TwoCurve 1 } + */ +#define DER_OID_V_c2pnb163v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x01 +#define DER_OID_SZ_c2pnb163v1 10 +extern const unsigned char ossl_der_oid_c2pnb163v1[DER_OID_SZ_c2pnb163v1]; + +/* + * c2pnb163v2 OBJECT IDENTIFIER ::= { c-TwoCurve 2 } + */ +#define DER_OID_V_c2pnb163v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x02 +#define DER_OID_SZ_c2pnb163v2 10 +extern const unsigned char ossl_der_oid_c2pnb163v2[DER_OID_SZ_c2pnb163v2]; + +/* + * c2pnb163v3 OBJECT IDENTIFIER ::= { c-TwoCurve 3 } + */ +#define DER_OID_V_c2pnb163v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x03 +#define DER_OID_SZ_c2pnb163v3 10 +extern const unsigned char ossl_der_oid_c2pnb163v3[DER_OID_SZ_c2pnb163v3]; + +/* + * c2pnb176w1 OBJECT IDENTIFIER ::= { c-TwoCurve 4 } + */ +#define DER_OID_V_c2pnb176w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x04 +#define DER_OID_SZ_c2pnb176w1 10 +extern const unsigned char ossl_der_oid_c2pnb176w1[DER_OID_SZ_c2pnb176w1]; + +/* + * c2tnb191v1 OBJECT IDENTIFIER ::= { c-TwoCurve 5 } + */ +#define DER_OID_V_c2tnb191v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x05 +#define DER_OID_SZ_c2tnb191v1 10 +extern const unsigned char ossl_der_oid_c2tnb191v1[DER_OID_SZ_c2tnb191v1]; + +/* + * c2tnb191v2 OBJECT IDENTIFIER ::= { c-TwoCurve 6 } + */ +#define DER_OID_V_c2tnb191v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x06 +#define DER_OID_SZ_c2tnb191v2 10 +extern const unsigned char ossl_der_oid_c2tnb191v2[DER_OID_SZ_c2tnb191v2]; + +/* + * c2tnb191v3 OBJECT IDENTIFIER ::= { c-TwoCurve 7 } + */ +#define DER_OID_V_c2tnb191v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x07 +#define DER_OID_SZ_c2tnb191v3 10 +extern const unsigned char ossl_der_oid_c2tnb191v3[DER_OID_SZ_c2tnb191v3]; + +/* + * c2onb191v4 OBJECT IDENTIFIER ::= { c-TwoCurve 8 } + */ +#define DER_OID_V_c2onb191v4 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x08 +#define DER_OID_SZ_c2onb191v4 10 +extern const unsigned char ossl_der_oid_c2onb191v4[DER_OID_SZ_c2onb191v4]; + +/* + * c2onb191v5 OBJECT IDENTIFIER ::= { c-TwoCurve 9 } + */ +#define DER_OID_V_c2onb191v5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x09 +#define DER_OID_SZ_c2onb191v5 10 +extern const unsigned char ossl_der_oid_c2onb191v5[DER_OID_SZ_c2onb191v5]; + +/* + * c2pnb208w1 OBJECT IDENTIFIER ::= { c-TwoCurve 10 } + */ +#define DER_OID_V_c2pnb208w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0A +#define DER_OID_SZ_c2pnb208w1 10 +extern const unsigned char ossl_der_oid_c2pnb208w1[DER_OID_SZ_c2pnb208w1]; + +/* + * c2tnb239v1 OBJECT IDENTIFIER ::= { c-TwoCurve 11 } + */ +#define DER_OID_V_c2tnb239v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0B +#define DER_OID_SZ_c2tnb239v1 10 +extern const unsigned char ossl_der_oid_c2tnb239v1[DER_OID_SZ_c2tnb239v1]; + +/* + * c2tnb239v2 OBJECT IDENTIFIER ::= { c-TwoCurve 12 } + */ +#define DER_OID_V_c2tnb239v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0C +#define DER_OID_SZ_c2tnb239v2 10 +extern const unsigned char ossl_der_oid_c2tnb239v2[DER_OID_SZ_c2tnb239v2]; + +/* + * c2tnb239v3 OBJECT IDENTIFIER ::= { c-TwoCurve 13 } + */ +#define DER_OID_V_c2tnb239v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0D +#define DER_OID_SZ_c2tnb239v3 10 +extern const unsigned char ossl_der_oid_c2tnb239v3[DER_OID_SZ_c2tnb239v3]; + +/* + * c2onb239v4 OBJECT IDENTIFIER ::= { c-TwoCurve 14 } + */ +#define DER_OID_V_c2onb239v4 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0E +#define DER_OID_SZ_c2onb239v4 10 +extern const unsigned char ossl_der_oid_c2onb239v4[DER_OID_SZ_c2onb239v4]; + +/* + * c2onb239v5 OBJECT IDENTIFIER ::= { c-TwoCurve 15 } + */ +#define DER_OID_V_c2onb239v5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0F +#define DER_OID_SZ_c2onb239v5 10 +extern const unsigned char ossl_der_oid_c2onb239v5[DER_OID_SZ_c2onb239v5]; + +/* + * c2pnb272w1 OBJECT IDENTIFIER ::= { c-TwoCurve 16 } + */ +#define DER_OID_V_c2pnb272w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x10 +#define DER_OID_SZ_c2pnb272w1 10 +extern const unsigned char ossl_der_oid_c2pnb272w1[DER_OID_SZ_c2pnb272w1]; + +/* + * c2pnb304w1 OBJECT IDENTIFIER ::= { c-TwoCurve 17 } + */ +#define DER_OID_V_c2pnb304w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x11 +#define DER_OID_SZ_c2pnb304w1 10 +extern const unsigned char ossl_der_oid_c2pnb304w1[DER_OID_SZ_c2pnb304w1]; + +/* + * c2tnb359v1 OBJECT IDENTIFIER ::= { c-TwoCurve 18 } + */ +#define DER_OID_V_c2tnb359v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x12 +#define DER_OID_SZ_c2tnb359v1 10 +extern const unsigned char ossl_der_oid_c2tnb359v1[DER_OID_SZ_c2tnb359v1]; + +/* + * c2pnb368w1 OBJECT IDENTIFIER ::= { c-TwoCurve 19 } + */ +#define DER_OID_V_c2pnb368w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x13 +#define DER_OID_SZ_c2pnb368w1 10 +extern const unsigned char ossl_der_oid_c2pnb368w1[DER_OID_SZ_c2pnb368w1]; + +/* + * c2tnb431r1 OBJECT IDENTIFIER ::= { c-TwoCurve 20 } + */ +#define DER_OID_V_c2tnb431r1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x14 +#define DER_OID_SZ_c2tnb431r1 10 +extern const unsigned char ossl_der_oid_c2tnb431r1[DER_OID_SZ_c2tnb431r1]; + +/* + * prime192v1 OBJECT IDENTIFIER ::= { primeCurve 1 } + */ +#define DER_OID_V_prime192v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x01 +#define DER_OID_SZ_prime192v1 10 +extern const unsigned char ossl_der_oid_prime192v1[DER_OID_SZ_prime192v1]; + +/* + * prime192v2 OBJECT IDENTIFIER ::= { primeCurve 2 } + */ +#define DER_OID_V_prime192v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x02 +#define DER_OID_SZ_prime192v2 10 +extern const unsigned char ossl_der_oid_prime192v2[DER_OID_SZ_prime192v2]; + +/* + * prime192v3 OBJECT IDENTIFIER ::= { primeCurve 3 } + */ +#define DER_OID_V_prime192v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x03 +#define DER_OID_SZ_prime192v3 10 +extern const unsigned char ossl_der_oid_prime192v3[DER_OID_SZ_prime192v3]; + +/* + * prime239v1 OBJECT IDENTIFIER ::= { primeCurve 4 } + */ +#define DER_OID_V_prime239v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x04 +#define DER_OID_SZ_prime239v1 10 +extern const unsigned char ossl_der_oid_prime239v1[DER_OID_SZ_prime239v1]; + +/* + * prime239v2 OBJECT IDENTIFIER ::= { primeCurve 5 } + */ +#define DER_OID_V_prime239v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x05 +#define DER_OID_SZ_prime239v2 10 +extern const unsigned char ossl_der_oid_prime239v2[DER_OID_SZ_prime239v2]; + +/* + * prime239v3 OBJECT IDENTIFIER ::= { primeCurve 6 } + */ +#define DER_OID_V_prime239v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x06 +#define DER_OID_SZ_prime239v3 10 +extern const unsigned char ossl_der_oid_prime239v3[DER_OID_SZ_prime239v3]; + +/* + * prime256v1 OBJECT IDENTIFIER ::= { primeCurve 7 } + */ +#define DER_OID_V_prime256v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07 +#define DER_OID_SZ_prime256v1 10 +extern const unsigned char ossl_der_oid_prime256v1[DER_OID_SZ_prime256v1]; + +/* + * ecdsa-with-SHA224 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 1 } + */ +#define DER_OID_V_ecdsa_with_SHA224 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01 +#define DER_OID_SZ_ecdsa_with_SHA224 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA224[DER_OID_SZ_ecdsa_with_SHA224]; + +/* + * ecdsa-with-SHA256 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 2 } + */ +#define DER_OID_V_ecdsa_with_SHA256 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02 +#define DER_OID_SZ_ecdsa_with_SHA256 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA256[DER_OID_SZ_ecdsa_with_SHA256]; + +/* + * ecdsa-with-SHA384 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 3 } + */ +#define DER_OID_V_ecdsa_with_SHA384 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x03 +#define DER_OID_SZ_ecdsa_with_SHA384 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA384[DER_OID_SZ_ecdsa_with_SHA384]; + +/* + * ecdsa-with-SHA512 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 4 } + */ +#define DER_OID_V_ecdsa_with_SHA512 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x04 +#define DER_OID_SZ_ecdsa_with_SHA512 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA512[DER_OID_SZ_ecdsa_with_SHA512]; + +/* + * id-ecdsa-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 9 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x09 +#define DER_OID_SZ_id_ecdsa_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_224[DER_OID_SZ_id_ecdsa_with_sha3_224]; + +/* + * id-ecdsa-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 10 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0A +#define DER_OID_SZ_id_ecdsa_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_256[DER_OID_SZ_id_ecdsa_with_sha3_256]; + +/* + * id-ecdsa-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 11 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0B +#define DER_OID_SZ_id_ecdsa_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_384[DER_OID_SZ_id_ecdsa_with_sha3_384]; + +/* + * id-ecdsa-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 12 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0C +#define DER_OID_SZ_id_ecdsa_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_512[DER_OID_SZ_id_ecdsa_with_sha3_512]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_EC(WPACKET *pkt, int cont, EC_KEY *ec); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_ECDSA_with_MD(WPACKET *pkt, int cont, + EC_KEY *ec, int mdnid); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ecx.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ecx.h new file mode 100644 index 00000000000..fc85738055b --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ecx.h @@ -0,0 +1,50 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_ecx.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" +#include "crypto/ecx.h" + +/* Well known OIDs precompiled */ + +/* + * id-X25519 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 110 } + */ +#define DER_OID_V_id_X25519 DER_P_OBJECT, 3, 0x2B, 0x65, 0x6E +#define DER_OID_SZ_id_X25519 5 +extern const unsigned char ossl_der_oid_id_X25519[DER_OID_SZ_id_X25519]; + +/* + * id-X448 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 111 } + */ +#define DER_OID_V_id_X448 DER_P_OBJECT, 3, 0x2B, 0x65, 0x6F +#define DER_OID_SZ_id_X448 5 +extern const unsigned char ossl_der_oid_id_X448[DER_OID_SZ_id_X448]; + +/* + * id-Ed25519 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 112 } + */ +#define DER_OID_V_id_Ed25519 DER_P_OBJECT, 3, 0x2B, 0x65, 0x70 +#define DER_OID_SZ_id_Ed25519 5 +extern const unsigned char ossl_der_oid_id_Ed25519[DER_OID_SZ_id_Ed25519]; + +/* + * id-Ed448 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 113 } + */ +#define DER_OID_V_id_Ed448 DER_P_OBJECT, 3, 0x2B, 0x65, 0x71 +#define DER_OID_SZ_id_Ed448 5 +extern const unsigned char ossl_der_oid_id_Ed448[DER_OID_SZ_id_Ed448]; + + +int ossl_DER_w_algorithmIdentifier_ED25519(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_ED448(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_X25519(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_X448(WPACKET *pkt, int cont, ECX_KEY *ec); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_rsa.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_rsa.h new file mode 100644 index 00000000000..5ec3c515a1b --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_rsa.h @@ -0,0 +1,187 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_rsa.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/rsa.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * hashAlgs OBJECT IDENTIFIER ::= { nistAlgorithms 2 } + */ +#define DER_OID_V_hashAlgs DER_P_OBJECT, 8, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02 +#define DER_OID_SZ_hashAlgs 10 +extern const unsigned char ossl_der_oid_hashAlgs[DER_OID_SZ_hashAlgs]; + +/* + * rsaEncryption OBJECT IDENTIFIER ::= { pkcs-1 1 } + */ +#define DER_OID_V_rsaEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01 +#define DER_OID_SZ_rsaEncryption 11 +extern const unsigned char ossl_der_oid_rsaEncryption[DER_OID_SZ_rsaEncryption]; + +/* + * id-RSAES-OAEP OBJECT IDENTIFIER ::= { pkcs-1 7 } + */ +#define DER_OID_V_id_RSAES_OAEP DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x07 +#define DER_OID_SZ_id_RSAES_OAEP 11 +extern const unsigned char ossl_der_oid_id_RSAES_OAEP[DER_OID_SZ_id_RSAES_OAEP]; + +/* + * id-pSpecified OBJECT IDENTIFIER ::= { pkcs-1 9 } + */ +#define DER_OID_V_id_pSpecified DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x09 +#define DER_OID_SZ_id_pSpecified 11 +extern const unsigned char ossl_der_oid_id_pSpecified[DER_OID_SZ_id_pSpecified]; + +/* + * id-RSASSA-PSS OBJECT IDENTIFIER ::= { pkcs-1 10 } + */ +#define DER_OID_V_id_RSASSA_PSS DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0A +#define DER_OID_SZ_id_RSASSA_PSS 11 +extern const unsigned char ossl_der_oid_id_RSASSA_PSS[DER_OID_SZ_id_RSASSA_PSS]; + +/* + * md2WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 2 } + */ +#define DER_OID_V_md2WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x02 +#define DER_OID_SZ_md2WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md2WithRSAEncryption[DER_OID_SZ_md2WithRSAEncryption]; + +/* + * md5WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 4 } + */ +#define DER_OID_V_md5WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x04 +#define DER_OID_SZ_md5WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md5WithRSAEncryption[DER_OID_SZ_md5WithRSAEncryption]; + +/* + * sha1WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 5 } + */ +#define DER_OID_V_sha1WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05 +#define DER_OID_SZ_sha1WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha1WithRSAEncryption[DER_OID_SZ_sha1WithRSAEncryption]; + +/* + * sha224WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 14 } + */ +#define DER_OID_V_sha224WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E +#define DER_OID_SZ_sha224WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha224WithRSAEncryption[DER_OID_SZ_sha224WithRSAEncryption]; + +/* + * sha256WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 11 } + */ +#define DER_OID_V_sha256WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B +#define DER_OID_SZ_sha256WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha256WithRSAEncryption[DER_OID_SZ_sha256WithRSAEncryption]; + +/* + * sha384WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 12 } + */ +#define DER_OID_V_sha384WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0C +#define DER_OID_SZ_sha384WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha384WithRSAEncryption[DER_OID_SZ_sha384WithRSAEncryption]; + +/* + * sha512WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 13 } + */ +#define DER_OID_V_sha512WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0D +#define DER_OID_SZ_sha512WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512WithRSAEncryption[DER_OID_SZ_sha512WithRSAEncryption]; + +/* + * sha512-224WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 15 } + */ +#define DER_OID_V_sha512_224WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0F +#define DER_OID_SZ_sha512_224WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512_224WithRSAEncryption[DER_OID_SZ_sha512_224WithRSAEncryption]; + +/* + * sha512-256WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 16 } + */ +#define DER_OID_V_sha512_256WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x10 +#define DER_OID_SZ_sha512_256WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512_256WithRSAEncryption[DER_OID_SZ_sha512_256WithRSAEncryption]; + +/* + * id-mgf1 OBJECT IDENTIFIER ::= { pkcs-1 8 } + */ +#define DER_OID_V_id_mgf1 DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x08 +#define DER_OID_SZ_id_mgf1 11 +extern const unsigned char ossl_der_oid_id_mgf1[DER_OID_SZ_id_mgf1]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 13 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0D +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_224[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_224]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 14 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0E +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_256[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_256]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 15 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0F +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_384[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_384]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 16 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x10 +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_512[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_512]; + +/* + * md4WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 3 } + */ +#define DER_OID_V_md4WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x03 +#define DER_OID_SZ_md4WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md4WithRSAEncryption[DER_OID_SZ_md4WithRSAEncryption]; + +/* + * ripemd160WithRSAEncryption OBJECT IDENTIFIER ::= { + * iso(1) identified-organization(3) teletrust(36) algorithm(3) signatureAlgorithm(3) rsaSignature(1) 2 + * } + */ +#define DER_OID_V_ripemd160WithRSAEncryption DER_P_OBJECT, 6, 0x2B, 0x24, 0x03, 0x03, 0x01, 0x02 +#define DER_OID_SZ_ripemd160WithRSAEncryption 8 +extern const unsigned char ossl_der_oid_ripemd160WithRSAEncryption[DER_OID_SZ_ripemd160WithRSAEncryption]; + +/* + * mdc2WithRSASignature OBJECT IDENTIFIER ::= { + * iso(1) identified-organization(3) oiw(14) secsig(3) algorithms(2) mdc2WithRSASignature(14) + * } + */ +#define DER_OID_V_mdc2WithRSASignature DER_P_OBJECT, 5, 0x2B, 0x0E, 0x03, 0x02, 0x0E +#define DER_OID_SZ_mdc2WithRSASignature 7 +extern const unsigned char ossl_der_oid_mdc2WithRSASignature[DER_OID_SZ_mdc2WithRSASignature]; + + +/* PSS parameters */ +int ossl_DER_w_RSASSA_PSS_params(WPACKET *pkt, int tag, + const RSA_PSS_PARAMS_30 *pss); +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_RSA(WPACKET *pkt, int tag, RSA *rsa); +int ossl_DER_w_algorithmIdentifier_RSA_PSS(WPACKET *pkt, int tag, + int rsa_type, + const RSA_PSS_PARAMS_30 *pss); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_MDWithRSAEncryption(WPACKET *pkt, int tag, + int mdnid); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_sm2.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_sm2.h new file mode 100644 index 00000000000..9d41b31265c --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_sm2.h @@ -0,0 +1,37 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_sm2.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/ec.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * sm2-with-SM3 OBJECT IDENTIFIER ::= { sm-scheme 501 } + */ +#define DER_OID_V_sm2_with_SM3 DER_P_OBJECT, 8, 0x2A, 0x81, 0x1C, 0xCF, 0x55, 0x01, 0x83, 0x75 +#define DER_OID_SZ_sm2_with_SM3 10 +extern const unsigned char ossl_der_oid_sm2_with_SM3[DER_OID_SZ_sm2_with_SM3]; + +/* + * curveSM2 OBJECT IDENTIFIER ::= { sm-scheme 301 } + */ +#define DER_OID_V_curveSM2 DER_P_OBJECT, 8, 0x2A, 0x81, 0x1C, 0xCF, 0x55, 0x01, 0x82, 0x2D +#define DER_OID_SZ_curveSM2 10 +extern const unsigned char ossl_der_oid_curveSM2[DER_OID_SZ_curveSM2]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_SM2(WPACKET *pkt, int cont, EC_KEY *ec); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_SM2_with_MD(WPACKET *pkt, int cont, + EC_KEY *ec, int mdnid); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_wrap.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_wrap.h new file mode 100644 index 00000000000..ff295403772 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_wrap.h @@ -0,0 +1,46 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_wrap.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * id-alg-CMS3DESwrap OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-9(9) smime(16) alg(3) 6 + * } + */ +#define DER_OID_V_id_alg_CMS3DESwrap DER_P_OBJECT, 11, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x10, 0x03, 0x06 +#define DER_OID_SZ_id_alg_CMS3DESwrap 13 +extern const unsigned char ossl_der_oid_id_alg_CMS3DESwrap[DER_OID_SZ_id_alg_CMS3DESwrap]; + +/* + * id-aes128-wrap OBJECT IDENTIFIER ::= { aes 5 } + */ +#define DER_OID_V_id_aes128_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x05 +#define DER_OID_SZ_id_aes128_wrap 11 +extern const unsigned char ossl_der_oid_id_aes128_wrap[DER_OID_SZ_id_aes128_wrap]; + +/* + * id-aes192-wrap OBJECT IDENTIFIER ::= { aes 25 } + */ +#define DER_OID_V_id_aes192_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x19 +#define DER_OID_SZ_id_aes192_wrap 11 +extern const unsigned char ossl_der_oid_id_aes192_wrap[DER_OID_SZ_id_aes192_wrap]; + +/* + * id-aes256-wrap OBJECT IDENTIFIER ::= { aes 45 } + */ +#define DER_OID_V_id_aes256_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x2D +#define DER_OID_SZ_id_aes256_wrap 11 +extern const unsigned char ossl_der_oid_id_aes256_wrap[DER_OID_SZ_id_aes256_wrap]; + diff --git a/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h b/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h new file mode 100644 index 00000000000..a3a6485bbe9 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h @@ -0,0 +1,30 @@ +/* + * WARNING: do not edit! + * Generated by util/mkbuildinf.pl + * + * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#define PLATFORM "platform: linux64-loongarch64" +#define DATE "built on: Thu Apr 18 07:53:56 2024 UTC" + +/* + * Generate compiler_flags as an array of individual characters. This is a + * workaround for the situation where CFLAGS gets too long for a C90 string + * literal + */ +static const char compiler_flags[] = { + 'c','o','m','p','i','l','e','r',':',' ','g','c','c',' ','-','f', + 'P','I','C',' ','-','p','t','h','r','e','a','d',' ','-','W','a', + 'l','l',' ','-','O','3',' ','-','D','O','P','E','N','S','S','L', + '_','U','S','E','_','N','O','D','E','L','E','T','E',' ','-','D', + 'L','_','E','N','D','I','A','N',' ','-','D','O','P','E','N','S', + 'S','L','_','P','I','C',' ','-','D','O','P','E','N','S','S','L', + '_','B','U','I','L','D','I','N','G','_','O','P','E','N','S','S', + 'L',' ','-','D','N','D','E','B','U','G','\0' +}; diff --git a/contrib/openssl-cmake/linux_loongarch64/params_idx.c b/contrib/openssl-cmake/linux_loongarch64/params_idx.c new file mode 100644 index 00000000000..9e453c26fdc --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/params_idx.c @@ -0,0 +1,2710 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from crypto/params_idx.c.in + * + * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#include "internal/e_os.h" +#include "internal/param_names.h" +#include + +/* Machine generated TRIE -- generated by util/perl/OpenSSL/paramnames.pm */ +int ossl_param_find_pidx(const char *s) +{ + switch(s[0]) { + default: + break; + case 'a': + switch(s[1]) { + default: + break; + case 'c': + if (strcmp("vp-info", s + 2) == 0) + return PIDX_KDF_PARAM_X942_ACVPINFO; + break; + case 'd': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_KDF_PARAM_ARGON2_AD; + } + break; + case 'e': + if (strcmp("ad", s + 2) == 0) + return PIDX_CIPHER_PARAM_AEAD; + break; + case 'l': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case '_': + if (strcmp("id_param", s + 4) == 0) + return PIDX_CIPHER_PARAM_ALGORITHM_ID_PARAMS; + break; + case 'i': + if (strcmp("d-absent", s + 4) == 0) + return PIDX_DIGEST_PARAM_ALGID_ABSENT; + break; + case 'o': + if (strcmp("rithm-id", s + 4) == 0) + return PIDX_SIGNATURE_PARAM_ALGORITHM_ID; + } + break; + case 'i': + if (strcmp("as", s + 3) == 0) + return PIDX_STORE_PARAM_ALIAS; + } + break; + case '\0': + return PIDX_PKEY_PARAM_EC_A; + } + break; + case 'b': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("sis-type", s + 2) == 0) + return PIDX_PKEY_PARAM_EC_CHAR2_TYPE; + break; + case 'i': + if (strcmp("ts", s + 2) == 0) + return PIDX_PKEY_PARAM_BITS; + break; + case 'l': + switch(s[2]) { + default: + break; + case 'o': + switch(s[3]) { + default: + break; + case 'c': + switch(s[4]) { + default: + break; + case 'k': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("size", s + 6) == 0) + return PIDX_MAC_PARAM_BLOCK_SIZE; + break; + case '_': + if (strcmp("padding", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING; + break; + case 's': + if (strcmp("ize", s + 6) == 0) + return PIDX_CIPHER_PARAM_BLOCK_SIZE; + } + } + } + } + break; + case 'u': + if (strcmp("ildinfo", s + 2) == 0) + return PIDX_PROV_PARAM_BUILDINFO; + break; + case '\0': + return PIDX_PKEY_PARAM_EC_B; + } + break; + case 'c': + switch(s[1]) { + default: + break; + case '-': + if (strcmp("rounds", s + 2) == 0) + return PIDX_MAC_PARAM_C_ROUNDS; + break; + case 'e': + if (strcmp("kalg", s + 2) == 0) + return PIDX_KDF_PARAM_CEK_ALG; + break; + case 'i': + if (strcmp("pher", s + 2) == 0) + return PIDX_ALG_PARAM_CIPHER; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'f': + if (strcmp("actor", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_COFACTOR; + break; + case 'n': + switch(s[3]) { + default: + break; + case 's': + if (strcmp("tant", s + 4) == 0) + return PIDX_KDF_PARAM_CONSTANT; + break; + case 't': + if (strcmp("ext-string", s + 4) == 0) + return PIDX_SIGNATURE_PARAM_CONTEXT_STRING; + } + } + break; + case 't': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case '_': + if (strcmp("mode", s + 4) == 0) + return PIDX_CIPHER_PARAM_CTS_MODE; + break; + case '\0': + return PIDX_CIPHER_PARAM_CTS; + } + } + break; + case 'u': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'o': + switch(s[5]) { + default: + break; + case 'm': + switch(s[6]) { + default: + break; + case '-': + if (strcmp("iv", s + 7) == 0) + return PIDX_CIPHER_PARAM_CUSTOM_IV; + break; + case '\0': + return PIDX_MAC_PARAM_CUSTOM; + } + } + } + } + } + } + break; + case 'd': + switch(s[1]) { + default: + break; + case '-': + if (strcmp("rounds", s + 2) == 0) + return PIDX_MAC_PARAM_D_ROUNDS; + break; + case 'a': + switch(s[2]) { + default: + break; + case 't': + switch(s[3]) { + default: + break; + case 'a': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 's': + if (strcmp("tructure", s + 6) == 0) + return PIDX_OBJECT_PARAM_DATA_STRUCTURE; + break; + case 't': + if (strcmp("ype", s + 6) == 0) + return PIDX_OBJECT_PARAM_DATA_TYPE; + } + break; + case '\0': + return PIDX_KDF_PARAM_DATA; + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'c': + if (strcmp("oded-from-explicit", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS; + break; + case 'f': + if (strcmp("ault-digest", s + 3) == 0) + return PIDX_PKEY_PARAM_DEFAULT_DIGEST; + break; + case 's': + if (strcmp("c", s + 3) == 0) + return PIDX_OBJECT_PARAM_DESC; + } + break; + case 'h': + if (strcmp("kem-ikm", s + 2) == 0) + return PIDX_PKEY_PARAM_DHKEM_IKM; + break; + case 'i': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 's': + switch(s[5]) { + default: + break; + case 't': + switch(s[6]) { + default: + break; + case '-': + switch(s[7]) { + default: + break; + case 'n': + if (strcmp("oinit", s + 8) == 0) + return PIDX_MAC_PARAM_DIGEST_NOINIT; + break; + case 'o': + if (strcmp("neshot", s + 8) == 0) + return PIDX_MAC_PARAM_DIGEST_ONESHOT; + break; + case 'p': + if (strcmp("rops", s + 8) == 0) + return PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS; + break; + case 's': + if (strcmp("ize", s + 8) == 0) + return PIDX_PKEY_PARAM_DIGEST_SIZE; + } + break; + case '\0': + return PIDX_STORE_PARAM_DIGEST; + } + } + } + } + break; + case 's': + if (strcmp("tid", s + 3) == 0) + return PIDX_PKEY_PARAM_DIST_ID; + } + break; + case 'r': + if (strcmp("bg-no-trunc-md", s + 2) == 0) + return PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST; + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_D; + } + break; + case 'e': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("rly_clean", s + 2) == 0) + return PIDX_KDF_PARAM_EARLY_CLEAN; + break; + case 'c': + if (strcmp("dh-cofactor-mode", s + 2) == 0) + return PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE; + break; + case 'n': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'o': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'e': + if (strcmp("d-pub-key", s + 6) == 0) + return PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY; + break; + case 'i': + if (strcmp("ng", s + 6) == 0) + return PIDX_PKEY_PARAM_EC_ENCODING; + } + } + break; + case 'r': + if (strcmp("ypt-level", s + 4) == 0) + return PIDX_ENCODER_PARAM_ENCRYPT_LEVEL; + } + break; + case 'g': + if (strcmp("ine", s + 3) == 0) + return PIDX_ALG_PARAM_ENGINE; + break; + case 't': + switch(s[3]) { + default: + break; + case 'r': + switch(s[4]) { + default: + break; + case 'o': + switch(s[5]) { + default: + break; + case 'p': + switch(s[6]) { + default: + break; + case 'y': + switch(s[7]) { + default: + break; + case '_': + if (strcmp("required", s + 8) == 0) + return PIDX_DRBG_PARAM_ENTROPY_REQUIRED; + break; + case '\0': + return PIDX_KDF_PARAM_HMACDRBG_ENTROPY; + } + } + } + } + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_E; + break; + case 'x': + if (strcmp("pect", s + 2) == 0) + return PIDX_STORE_PARAM_EXPECT; + } + break; + case 'f': + switch(s[1]) { + default: + break; + case 'i': + switch(s[2]) { + default: + break; + case 'e': + if (strcmp("ld-type", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_FIELD_TYPE; + break; + case 'n': + if (strcmp("gerprint", s + 3) == 0) + return PIDX_STORE_PARAM_FINGERPRINT; + } + } + break; + case 'g': + switch(s[1]) { + default: + break; + case 'e': + switch(s[2]) { + default: + break; + case 'n': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'r': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case '\0': + return PIDX_RAND_PARAM_GENERATE; + } + break; + case 'o': + if (strcmp("r", s + 8) == 0) + return PIDX_PKEY_PARAM_EC_GENERATOR; + } + } + } + } + } + } + break; + case 'i': + if (strcmp("ndex", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_GINDEX; + break; + case 'r': + switch(s[2]) { + default: + break; + case 'o': + switch(s[3]) { + default: + break; + case 'u': + switch(s[4]) { + default: + break; + case 'p': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("check", s + 6) == 0) + return PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE; + break; + case '\0': + return PIDX_PKEY_PARAM_GROUP_NAME; + } + } + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_G; + } + break; + case 'h': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("s-randkey", s + 2) == 0) + return PIDX_CIPHER_PARAM_HAS_RAND_KEY; + break; + case 'i': + if (strcmp("ndex", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_H; + } + break; + case 'i': + switch(s[1]) { + default: + break; + case 'd': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_KDF_PARAM_PKCS12_ID; + } + break; + case 'k': + if (strcmp("me", s + 2) == 0) + return PIDX_KEM_PARAM_IKME; + break; + case 'm': + if (strcmp("plicit-rejection", s + 2) == 0) + return PIDX_PKEY_PARAM_IMPLICIT_REJECTION; + break; + case 'n': + switch(s[2]) { + default: + break; + case 'c': + if (strcmp("lude-public", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC; + break; + case 'f': + if (strcmp("o", s + 3) == 0) + return PIDX_PASSPHRASE_PARAM_INFO; + break; + case 'p': + if (strcmp("ut-type", s + 3) == 0) + return PIDX_STORE_PARAM_INPUT_TYPE; + break; + case 's': + if (strcmp("tance", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_INSTANCE; + } + break; + case 't': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'r': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("tion", s + 5) == 0) + return PIDX_GEN_PARAM_ITERATION; + break; + case '\0': + return PIDX_KDF_PARAM_ITER; + } + } + } + break; + case 'v': + switch(s[2]) { + default: + break; + case 'l': + if (strcmp("en", s + 3) == 0) + return PIDX_CIPHER_PARAM_IVLEN; + break; + case '\0': + return PIDX_MAC_PARAM_IV; + } + } + break; + case 'j': + switch(s[1]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_COFACTOR; + } + break; + case 'k': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K2; + } + break; + case '3': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K3; + } + break; + case 'a': + if (strcmp("t", s + 2) == 0) + return PIDX_SIGNATURE_PARAM_KAT; + break; + case 'd': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 'g': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case 's': + switch(s[9]) { + default: + break; + case 't': + switch(s[10]) { + default: + break; + case '-': + if (strcmp("props", s + 11) == 0) + return PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS; + break; + case '\0': + return PIDX_EXCHANGE_PARAM_KDF_DIGEST; + } + } + } + } + } + } + break; + case 'o': + if (strcmp("utlen", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_OUTLEN; + break; + case 't': + if (strcmp("ype", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_TYPE; + break; + case 'u': + if (strcmp("km", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_UKM; + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'y': + switch(s[3]) { + default: + break; + case 'b': + if (strcmp("its", s + 4) == 0) + return PIDX_CIPHER_PARAM_RC2_KEYBITS; + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_CIPHER_PARAM_KEYLEN; + break; + case '\0': + return PIDX_KDF_PARAM_KEY; + } + } + } + break; + case 'l': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'b': + if (strcmp("el", s + 3) == 0) + return PIDX_KDF_PARAM_LABEL; + break; + case 'n': + if (strcmp("es", s + 3) == 0) + return PIDX_KDF_PARAM_ARGON2_LANES; + } + } + break; + case 'm': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'k': + if (strcmp("ey", s + 4) == 0) + return PIDX_CIPHER_PARAM_AEAD_MAC_KEY; + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_KDF_PARAM_MAC_SIZE; + break; + case '\0': + return PIDX_ALG_PARAM_MAC; + } + break; + case 'n': + if (strcmp("datory-digest", s + 3) == 0) + return PIDX_PKEY_PARAM_MANDATORY_DIGEST; + break; + case 'x': + switch(s[3]) { + default: + break; + case '-': + if (strcmp("size", s + 4) == 0) + return PIDX_PKEY_PARAM_MAX_SIZE; + break; + case '_': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("dinlen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_ADINLEN; + break; + case 'e': + switch(s[5]) { + default: + break; + case 'a': + if (strcmp("rly_data", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA; + break; + case 'n': + if (strcmp("tropylen", s + 6) == 0) + return PIDX_DRBG_PARAM_MAX_ENTROPYLEN; + } + break; + case 'f': + if (strcmp("rag_len", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN; + break; + case 'n': + if (strcmp("oncelen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_NONCELEN; + break; + case 'p': + if (strcmp("erslen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_PERSLEN; + break; + case 'r': + if (strcmp("equest", s + 5) == 0) + return PIDX_RAND_PARAM_MAX_REQUEST; + } + break; + case 'i': + if (strcmp("um_length", s + 4) == 0) + return PIDX_DRBG_PARAM_MAX_LENGTH; + break; + case 'm': + if (strcmp("em_bytes", s + 4) == 0) + return PIDX_KDF_PARAM_SCRYPT_MAXMEM; + } + } + break; + case 'e': + if (strcmp("mcost", s + 2) == 0) + return PIDX_KDF_PARAM_ARGON2_MEMCOST; + break; + case 'g': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case '1': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'd': + if (strcmp("igest", s + 6) == 0) + return PIDX_PKEY_PARAM_MGF1_DIGEST; + break; + case 'p': + if (strcmp("roperties", s + 6) == 0) + return PIDX_PKEY_PARAM_MGF1_PROPERTIES; + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_MASKGENFUNC; + } + } + break; + case 'i': + switch(s[2]) { + default: + break; + case 'c': + if (strcmp("alg", s + 3) == 0) + return PIDX_DIGEST_PARAM_MICALG; + break; + case 'n': + switch(s[3]) { + default: + break; + case '_': + switch(s[4]) { + default: + break; + case 'e': + if (strcmp("ntropylen", s + 5) == 0) + return PIDX_DRBG_PARAM_MIN_ENTROPYLEN; + break; + case 'n': + if (strcmp("oncelen", s + 5) == 0) + return PIDX_DRBG_PARAM_MIN_NONCELEN; + } + break; + case 'i': + if (strcmp("um_length", s + 4) == 0) + return PIDX_DRBG_PARAM_MIN_LENGTH; + } + } + break; + case 'o': + switch(s[2]) { + default: + break; + case 'd': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case '\0': + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE; + } + break; + case 'u': + if (strcmp("le-filename", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_MODULE_FILENAME; + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_M; + } + break; + case 'n': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("me", s + 2) == 0) + return PIDX_STORE_PARAM_ISSUER; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'n': + switch(s[3]) { + default: + break; + case 'c': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("type", s + 6) == 0) + return PIDX_SIGNATURE_PARAM_NONCE_TYPE; + break; + case '\0': + return PIDX_KDF_PARAM_HMACDRBG_NONCE; + } + } + } + } + break; + case 'u': + if (strcmp("m", s + 2) == 0) + return PIDX_CIPHER_PARAM_NUM; + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_N; + } + break; + case 'o': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("ep-label", s + 2) == 0) + return PIDX_ASYM_CIPHER_PARAM_OAEP_LABEL; + break; + case 'p': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'n': + if (strcmp("ssl-version", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_VERSION; + break; + case 'r': + if (strcmp("ation", s + 4) == 0) + return PIDX_KEM_PARAM_OPERATION; + } + break; + case 't': + if (strcmp("ions", s + 3) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS; + } + break; + case 'r': + if (strcmp("der", s + 2) == 0) + return PIDX_PKEY_PARAM_EC_ORDER; + } + break; + case 'p': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_P1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_P2; + } + break; + case 'a': + switch(s[2]) { + default: + break; + case 'd': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'm': + if (strcmp("ode", s + 5) == 0) + return PIDX_PKEY_PARAM_PAD_MODE; + break; + case 't': + if (strcmp("ype", s + 5) == 0) + return PIDX_DIGEST_PARAM_PAD_TYPE; + } + break; + case 'd': + if (strcmp("ing", s + 4) == 0) + return PIDX_CIPHER_PARAM_PADDING; + break; + case '\0': + return PIDX_EXCHANGE_PARAM_PAD; + } + break; + case 'r': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'y': + switch(s[5]) { + default: + break; + case 'u': + if (strcmp("-info", s + 6) == 0) + return PIDX_KDF_PARAM_X942_PARTYUINFO; + break; + case 'v': + if (strcmp("-info", s + 6) == 0) + return PIDX_KDF_PARAM_X942_PARTYVINFO; + } + } + } + break; + case 's': + if (strcmp("s", s + 3) == 0) + return PIDX_KDF_PARAM_PASSWORD; + } + break; + case 'b': + if (strcmp("its", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_PBITS; + break; + case 'c': + if (strcmp("ounter", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_PCOUNTER; + break; + case 'k': + if (strcmp("cs5", s + 2) == 0) + return PIDX_KDF_PARAM_PKCS5; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'i': + if (strcmp("nt-format", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT; + break; + case 't': + if (strcmp("ential", s + 3) == 0) + return PIDX_GEN_PARAM_POTENTIAL; + } + break; + case 'r': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'd': + if (strcmp("iction_resistance", s + 4) == 0) + return PIDX_DRBG_PARAM_PREDICTION_RESISTANCE; + break; + case 'f': + if (strcmp("ix", s + 4) == 0) + return PIDX_KDF_PARAM_PREFIX; + } + break; + case 'i': + switch(s[3]) { + default: + break; + case 'm': + if (strcmp("es", s + 4) == 0) + return PIDX_PKEY_PARAM_RSA_PRIMES; + break; + case 'v': + switch(s[4]) { + default: + break; + case '_': + if (strcmp("len", s + 5) == 0) + return PIDX_PKEY_PARAM_DH_PRIV_LEN; + break; + case '\0': + return PIDX_PKEY_PARAM_PRIV_KEY; + } + } + break; + case 'o': + switch(s[3]) { + default: + break; + case 'p': + if (strcmp("erties", s + 4) == 0) + return PIDX_ALG_PARAM_PROPERTIES; + break; + case 'v': + if (strcmp("ider-name", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_PROV_NAME; + } + } + break; + case 'u': + if (strcmp("b", s + 2) == 0) + return PIDX_PKEY_PARAM_PUB_KEY; + break; + case '\0': + return PIDX_KDF_PARAM_SCRYPT_P; + } + break; + case 'q': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_Q1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_Q2; + } + break; + case 'b': + if (strcmp("its", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_QBITS; + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_Q; + break; + case 'x': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_PUB_X; + } + break; + case 'y': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_PUB_Y; + } + } + break; + case 'r': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'n': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case 'k': + if (strcmp("ey", s + 5) == 0) + return PIDX_CIPHER_PARAM_RANDOM_KEY; + break; + case 'o': + if (strcmp("m_data", s + 5) == 0) + return PIDX_DRBG_PARAM_RANDOM_DATA; + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'a': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case '_': + switch(s[5]) { + default: + break; + case 'a': + if (strcmp("head", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD; + break; + case 'b': + if (strcmp("uffer_len", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN; + } + } + } + break; + case 'f': + if (strcmp("erence", s + 3) == 0) + return PIDX_OBJECT_PARAM_REFERENCE; + break; + case 's': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case 'd': + switch(s[6]) { + default: + break; + case '_': + switch(s[7]) { + default: + break; + case 'c': + if (strcmp("ounter", s + 8) == 0) + return PIDX_DRBG_PARAM_RESEED_COUNTER; + break; + case 'r': + if (strcmp("equests", s + 8) == 0) + return PIDX_DRBG_PARAM_RESEED_REQUESTS; + break; + case 't': + switch(s[8]) { + default: + break; + case 'i': + switch(s[9]) { + default: + break; + case 'm': + switch(s[10]) { + default: + break; + case 'e': + switch(s[11]) { + default: + break; + case '_': + if (strcmp("interval", s + 12) == 0) + return PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL; + break; + case '\0': + return PIDX_DRBG_PARAM_RESEED_TIME; + } + } + } + } + } + } + } + } + } + } + break; + case 'o': + if (strcmp("unds", s + 2) == 0) + return PIDX_CIPHER_PARAM_ROUNDS; + break; + case 's': + switch(s[2]) { + default: + break; + case 'a': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'c': + switch(s[5]) { + default: + break; + case 'o': + switch(s[6]) { + default: + break; + case 'e': + switch(s[7]) { + default: + break; + case 'f': + switch(s[8]) { + default: + break; + case 'f': + switch(s[9]) { + default: + break; + case 'i': + switch(s[10]) { + default: + break; + case 'c': + switch(s[11]) { + default: + break; + case 'i': + switch(s[12]) { + default: + break; + case 'e': + switch(s[13]) { + default: + break; + case 'n': + switch(s[14]) { + default: + break; + case 't': + switch(s[15]) { + default: + break; + case '1': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT1; + } + break; + case '2': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT2; + } + break; + case '3': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT3; + } + break; + case '4': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT4; + } + break; + case '5': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT5; + } + break; + case '6': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT6; + } + break; + case '7': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT7; + } + break; + case '8': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT8; + } + break; + case '9': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT; + } + } + } + } + } + } + } + } + } + } + } + break; + case 'e': + switch(s[5]) { + default: + break; + case 'x': + switch(s[6]) { + default: + break; + case 'p': + switch(s[7]) { + default: + break; + case 'o': + switch(s[8]) { + default: + break; + case 'n': + switch(s[9]) { + default: + break; + case 'e': + switch(s[10]) { + default: + break; + case 'n': + switch(s[11]) { + default: + break; + case 't': + switch(s[12]) { + default: + break; + case '1': + switch(s[13]) { + default: + break; + case '0': + switch(s[14]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT10; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT1; + } + break; + case '2': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT2; + } + break; + case '3': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT3; + } + break; + case '4': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT4; + } + break; + case '5': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT5; + } + break; + case '6': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT6; + } + break; + case '7': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT7; + } + break; + case '8': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT8; + } + break; + case '9': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT; + } + } + } + } + } + } + } + } + break; + case 'f': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 'c': + switch(s[7]) { + default: + break; + case 't': + switch(s[8]) { + default: + break; + case 'o': + switch(s[9]) { + default: + break; + case 'r': + switch(s[10]) { + default: + break; + case '1': + switch(s[11]) { + default: + break; + case '0': + switch(s[12]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR10; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR1; + } + break; + case '2': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR2; + } + break; + case '3': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR3; + } + break; + case '4': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR4; + } + break; + case '5': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR5; + } + break; + case '6': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR6; + } + break; + case '7': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR7; + } + break; + case '8': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR8; + } + break; + case '9': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR; + } + } + } + } + } + } + } + } + } + break; + case '\0': + return PIDX_KDF_PARAM_SCRYPT_R; + } + break; + case 's': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'f': + if (strcmp("eprime-generator", s + 3) == 0) + return PIDX_PKEY_PARAM_DH_GENERATOR; + break; + case 'l': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'l': + if (strcmp("en", s + 5) == 0) + return PIDX_PKEY_PARAM_RSA_PSS_SALTLEN; + break; + case '\0': + return PIDX_KDF_PARAM_SALT; + } + } + break; + case 'v': + if (strcmp("e-parameters", s + 3) == 0) + return PIDX_ENCODER_PARAM_SAVE_PARAMETERS; + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'r': + if (strcmp("et", s + 4) == 0) + return PIDX_KDF_PARAM_SECRET; + break; + case 'u': + switch(s[4]) { + default: + break; + case 'r': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'y': + switch(s[8]) { + default: + break; + case '-': + switch(s[9]) { + default: + break; + case 'b': + if (strcmp("its", s + 10) == 0) + return PIDX_PKEY_PARAM_SECURITY_BITS; + break; + case 'c': + if (strcmp("hecks", s + 10) == 0) + return PIDX_PROV_PARAM_SECURITY_CHECKS; + } + } + } + } + } + } + } + break; + case 'e': + if (strcmp("d", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_SEED; + break; + case 'r': + if (strcmp("ial", s + 3) == 0) + return PIDX_STORE_PARAM_SERIAL; + break; + case 's': + if (strcmp("sion_id", s + 3) == 0) + return PIDX_KDF_PARAM_SSHKDF_SESSION_ID; + } + break; + case 'i': + if (strcmp("ze", s + 2) == 0) + return PIDX_MAC_PARAM_SIZE; + break; + case 'p': + if (strcmp("eed", s + 2) == 0) + return PIDX_CIPHER_PARAM_SPEED; + break; + case 's': + if (strcmp("l3-ms", s + 2) == 0) + return PIDX_DIGEST_PARAM_SSL3_MS; + break; + case 't': + switch(s[2]) { + default: + break; + case '-': + switch(s[3]) { + default: + break; + case 'd': + if (strcmp("esc", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_DESC; + break; + case 'p': + if (strcmp("hase", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_PHASE; + break; + case 't': + if (strcmp("ype", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_TYPE; + } + break; + case 'a': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case '\0': + return PIDX_RAND_PARAM_STATE; + } + break; + case 'u': + if (strcmp("s", s + 5) == 0) + return PIDX_PROV_PARAM_STATUS; + } + } + break; + case 'r': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("m_mac", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC; + break; + case 'n': + if (strcmp("gth", s + 5) == 0) + return PIDX_RAND_PARAM_STRENGTH; + } + } + } + break; + case 'u': + switch(s[2]) { + default: + break; + case 'b': + if (strcmp("ject", s + 3) == 0) + return PIDX_STORE_PARAM_SUBJECT; + break; + case 'p': + switch(s[3]) { + default: + break; + case 'p': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'p': + switch(s[6]) { + default: + break; + case 'r': + if (strcmp("ivinfo", s + 7) == 0) + return PIDX_KDF_PARAM_X942_SUPP_PRIVINFO; + break; + case 'u': + if (strcmp("binfo", s + 7) == 0) + return PIDX_KDF_PARAM_X942_SUPP_PUBINFO; + } + } + } + } + } + } + break; + case 't': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_CIPHER_PARAM_AEAD_TAGLEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_AEAD_TAG; + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case '_': + switch(s[5]) { + default: + break; + case 'e': + if (strcmp("ntropy", s + 6) == 0) + return PIDX_RAND_PARAM_TEST_ENTROPY; + break; + case 'n': + if (strcmp("once", s + 6) == 0) + return PIDX_RAND_PARAM_TEST_NONCE; + } + } + } + } + break; + case 'h': + if (strcmp("reads", s + 2) == 0) + return PIDX_KDF_PARAM_THREADS; + break; + case 'l': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'c': + if (strcmp("lient-version", s + 5) == 0) + return PIDX_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION; + break; + case 'd': + if (strcmp("ata-size", s + 5) == 0) + return PIDX_MAC_PARAM_TLS_DATA_SIZE; + break; + case 'g': + switch(s[5]) { + default: + break; + case 'r': + switch(s[6]) { + default: + break; + case 'o': + switch(s[7]) { + default: + break; + case 'u': + switch(s[8]) { + default: + break; + case 'p': + switch(s[9]) { + default: + break; + case '-': + switch(s[10]) { + default: + break; + case 'a': + if (strcmp("lg", s + 11) == 0) + return PIDX_CAPABILITY_TLS_GROUP_ALG; + break; + case 'i': + switch(s[11]) { + default: + break; + case 'd': + switch(s[12]) { + default: + break; + case '\0': + return PIDX_CAPABILITY_TLS_GROUP_ID; + } + break; + case 's': + if (strcmp("-kem", s + 12) == 0) + return PIDX_CAPABILITY_TLS_GROUP_IS_KEM; + } + break; + case 'n': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'm': + switch(s[13]) { + default: + break; + case 'e': + switch(s[14]) { + default: + break; + case '-': + if (strcmp("internal", s + 15) == 0) + return PIDX_CAPABILITY_TLS_GROUP_NAME_INTERNAL; + break; + case '\0': + return PIDX_CAPABILITY_TLS_GROUP_NAME; + } + } + } + } + break; + case 's': + if (strcmp("ec-bits", s + 11) == 0) + return PIDX_CAPABILITY_TLS_GROUP_SECURITY_BITS; + } + } + } + } + } + } + break; + case 'm': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 'c': + switch(s[7]) { + default: + break; + case '-': + if (strcmp("size", s + 8) == 0) + return PIDX_CIPHER_PARAM_TLS_MAC_SIZE; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS_MAC; + } + break; + case 'x': + switch(s[7]) { + default: + break; + case '-': + switch(s[8]) { + default: + break; + case 'd': + if (strcmp("tls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_GROUP_MAX_DTLS; + break; + case 't': + if (strcmp("ls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS; + } + } + } + break; + case 'i': + switch(s[6]) { + default: + break; + case 'n': + switch(s[7]) { + default: + break; + case '-': + switch(s[8]) { + default: + break; + case 'd': + if (strcmp("tls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_GROUP_MIN_DTLS; + break; + case 't': + if (strcmp("ls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS; + } + } + } + break; + case 'u': + if (strcmp("lti", s + 6) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK; + } + break; + case 'n': + if (strcmp("egotiated-version", s + 5) == 0) + return PIDX_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION; + break; + case 's': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 'g': + switch(s[7]) { + default: + break; + case 'a': + switch(s[8]) { + default: + break; + case 'l': + switch(s[9]) { + default: + break; + case 'g': + switch(s[10]) { + default: + break; + case '-': + switch(s[11]) { + default: + break; + case 'c': + if (strcmp("ode-point", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_CODE_POINT; + break; + case 'h': + switch(s[12]) { + default: + break; + case 'a': + switch(s[13]) { + default: + break; + case 's': + switch(s[14]) { + default: + break; + case 'h': + switch(s[15]) { + default: + break; + case '-': + switch(s[16]) { + default: + break; + case 'n': + if (strcmp("ame", s + 17) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_HASH_NAME; + break; + case 'o': + if (strcmp("id", s + 17) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_HASH_OID; + } + } + } + } + } + break; + case 'i': + if (strcmp("ana-name", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME; + break; + case 'k': + switch(s[12]) { + default: + break; + case 'e': + switch(s[13]) { + default: + break; + case 'y': + switch(s[14]) { + default: + break; + case 't': + switch(s[15]) { + default: + break; + case 'y': + switch(s[16]) { + default: + break; + case 'p': + switch(s[17]) { + default: + break; + case 'e': + switch(s[18]) { + default: + break; + case '-': + if (strcmp("oid", s + 19) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID; + break; + case '\0': + return PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE; + } + } + } + } + } + } + } + break; + case 'n': + if (strcmp("ame", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_NAME; + break; + case 'o': + if (strcmp("id", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_OID; + break; + case 's': + switch(s[12]) { + default: + break; + case 'e': + if (strcmp("c-bits", s + 13) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SECURITY_BITS; + break; + case 'i': + switch(s[13]) { + default: + break; + case 'g': + switch(s[14]) { + default: + break; + case '-': + switch(s[15]) { + default: + break; + case 'n': + if (strcmp("ame", s + 16) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SIG_NAME; + break; + case 'o': + if (strcmp("id", s + 16) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SIG_OID; + } + } + } + } + } + } + } + } + } + } + } + break; + case 'v': + if (strcmp("ersion", s + 5) == 0) + return PIDX_CIPHER_PARAM_TLS_VERSION; + } + break; + case '1': + switch(s[4]) { + default: + break; + case '-': + if (strcmp("prf-ems-check", s + 5) == 0) + return PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK; + break; + case 'm': + switch(s[5]) { + default: + break; + case 'u': + switch(s[6]) { + default: + break; + case 'l': + switch(s[7]) { + default: + break; + case 't': + switch(s[8]) { + default: + break; + case 'i': + switch(s[9]) { + default: + break; + case '_': + switch(s[10]) { + default: + break; + case 'a': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'd': + switch(s[13]) { + default: + break; + case 'p': + if (strcmp("acklen", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD; + } + } + } + break; + case 'e': + switch(s[11]) { + default: + break; + case 'n': + switch(s[12]) { + default: + break; + case 'c': + switch(s[13]) { + default: + break; + case 'i': + if (strcmp("n", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN; + break; + case 'l': + if (strcmp("en", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC; + } + } + } + break; + case 'i': + if (strcmp("nterleave", s + 11) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE; + break; + case 'm': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'x': + switch(s[13]) { + default: + break; + case 'b': + if (strcmp("ufsz", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE; + break; + case 's': + if (strcmp("ndfrag", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT; + } + } + } + } + } + } + } + } + } + } + break; + case 'a': + switch(s[4]) { + default: + break; + case 'a': + switch(s[5]) { + default: + break; + case 'd': + switch(s[6]) { + default: + break; + case 'p': + if (strcmp("ad", s + 7) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_AAD_PAD; + break; + case '\0': + return PIDX_CIPHER_PARAM_AEAD_TLS1_AAD; + } + } + } + break; + case 'i': + switch(s[4]) { + default: + break; + case 'v': + switch(s[5]) { + default: + break; + case 'f': + if (strcmp("ixed", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_IV_FIXED; + break; + case 'g': + if (strcmp("en", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN; + break; + case 'i': + if (strcmp("nv", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV; + } + } + break; + case 't': + if (strcmp("ree", s + 4) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE; + } + } + break; + case 'p': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS; + } + break; + case 'y': + if (strcmp("pe", s + 2) == 0) + return PIDX_OBJECT_PARAM_TYPE; + } + break; + case 'u': + switch(s[1]) { + default: + break; + case 'k': + if (strcmp("m", s + 2) == 0) + return PIDX_KDF_PARAM_UKM; + break; + case 'p': + if (strcmp("dated-iv", s + 2) == 0) + return PIDX_CIPHER_PARAM_UPDATED_IV; + break; + case 's': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'b': + if (strcmp("its", s + 5) == 0) + return PIDX_CIPHER_PARAM_USE_BITS; + break; + case 'c': + if (strcmp("ofactor-flag", s + 5) == 0) + return PIDX_PKEY_PARAM_USE_COFACTOR_FLAG; + break; + case 'k': + if (strcmp("eybits", s + 5) == 0) + return PIDX_KDF_PARAM_X942_USE_KEYBITS; + break; + case 'l': + switch(s[5]) { + default: + break; + case '\0': + return PIDX_KDF_PARAM_KBKDF_USE_L; + } + break; + case 's': + if (strcmp("eparator", s + 5) == 0) + return PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR; + } + break; + case '_': + switch(s[4]) { + default: + break; + case 'd': + if (strcmp("erivation_function", s + 5) == 0) + return PIDX_DRBG_PARAM_USE_DF; + break; + case 'e': + if (strcmp("tm", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM; + } + } + } + } + break; + case 'v': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'l': + switch(s[3]) { + default: + break; + case 'i': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case '-': + switch(s[9]) { + default: + break; + case 'g': + switch(s[10]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_VALIDATE_G; + } + break; + case 'l': + if (strcmp("egacy", s + 10) == 0) + return PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY; + break; + case 'p': + if (strcmp("q", s + 10) == 0) + return PIDX_PKEY_PARAM_FFC_VALIDATE_PQ; + } + } + } + } + } + } + } + } + break; + case 'e': + if (strcmp("rsion", s + 2) == 0) + return PIDX_PROV_PARAM_VERSION; + } + break; + case 'x': + switch(s[1]) { + default: + break; + case 'c': + if (strcmp("ghash", s + 2) == 0) + return PIDX_KDF_PARAM_SSHKDF_XCGHASH; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_DIGEST_PARAM_XOFLEN; + break; + case '\0': + return PIDX_MAC_PARAM_XOF; + } + } + break; + case 'p': + switch(s[2]) { + default: + break; + case '1': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP1; + } + break; + case '2': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP2; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP; + } + break; + case 'q': + switch(s[2]) { + default: + break; + case '1': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ1; + } + break; + case '2': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ2; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ; + } + break; + case 't': + if (strcmp("s_standard", s + 2) == 0) + return PIDX_CIPHER_PARAM_XTS_STANDARD; + } + } + return -1; +} + +/* End of TRIE */ diff --git a/contrib/rocksdb b/contrib/rocksdb index dead55e60b8..3a0b80ca9d6 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit dead55e60b873d5f70f0e9458fbbba2b2180f430 +Subproject commit 3a0b80ca9d6eebb38fad7ea3f41dfc9db4f6a984 diff --git a/contrib/sentry-native-cmake/CMakeLists.txt b/contrib/sentry-native-cmake/CMakeLists.txt index 6364e75db28..6e4c8c36081 100644 --- a/contrib/sentry-native-cmake/CMakeLists.txt +++ b/contrib/sentry-native-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -if (NOT OS_FREEBSD AND NOT (OS_DARWIN AND COMPILER_CLANG)) +if (NOT OS_FREEBSD AND NOT OS_DARWIN) option (ENABLE_SENTRY "Enable Sentry" ${ENABLE_LIBRARIES}) else() option (ENABLE_SENTRY "Enable Sentry" OFF) diff --git a/contrib/yaml-cpp b/contrib/yaml-cpp index 0c86adac6d1..f91e9383412 160000 --- a/contrib/yaml-cpp +++ b/contrib/yaml-cpp @@ -1 +1 @@ -Subproject commit 0c86adac6d117ee2b4afcedb8ade19036ca0327d +Subproject commit f91e938341273b5f9d341380ab17bcc3de5daa06 diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 413ad2dfaed..b3271d94184 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.4.1.2088" +ARG VERSION="24.5.1.1763" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/packager/README.md b/docker/packager/README.md index 3604e8585a4..12947aed62f 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -3,10 +3,10 @@ compilers and build settings. Correctly configured Docker daemon is single depen Usage: -Build deb package with `clang-17` in `debug` mode: +Build deb package with `clang-18` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-17 --debug-build +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-18 --debug-build $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb @@ -17,11 +17,11 @@ $ ls -l deb/test_output ``` -Build ClickHouse binary with `clang-17` and `address` sanitizer in `relwithdebuginfo` +Build ClickHouse binary with `clang-18` and `address` sanitizer in `relwithdebuginfo` mode: ``` $ mkdir $HOME/some_clickhouse -$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-17 --sanitizer=address +$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-18 --sanitizer=address $ ls -l $HOME/some_clickhouse -rwxr-xr-x 1 root root 787061952 clickhouse lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse diff --git a/docker/packager/packager b/docker/packager/packager index 23fc26bc1a4..2dcbd8d695e 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -149,6 +149,7 @@ def parse_env_variables( PPC_SUFFIX = "-ppc64le" RISCV_SUFFIX = "-riscv64" S390X_SUFFIX = "-s390x" + LOONGARCH_SUFFIX = "-loongarch64" AMD64_COMPAT_SUFFIX = "-amd64-compat" AMD64_MUSL_SUFFIX = "-amd64-musl" @@ -167,6 +168,7 @@ def parse_env_variables( is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_riscv = compiler.endswith(RISCV_SUFFIX) is_cross_s390x = compiler.endswith(S390X_SUFFIX) + is_cross_loongarch = compiler.endswith(LOONGARCH_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) is_amd64_musl = compiler.endswith(AMD64_MUSL_SUFFIX) @@ -235,6 +237,11 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-s390x.cmake" ) + elif is_cross_loongarch: + cc = compiler[: -len(LOONGARCH_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-loongarch64.cmake" + ) elif is_amd64_compat: cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") @@ -403,19 +410,20 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--compiler", choices=( - "clang-17", - "clang-17-darwin", - "clang-17-darwin-aarch64", - "clang-17-aarch64", - "clang-17-aarch64-v80compat", - "clang-17-ppc64le", - "clang-17-riscv64", - "clang-17-s390x", - "clang-17-amd64-compat", - "clang-17-amd64-musl", - "clang-17-freebsd", + "clang-18", + "clang-18-darwin", + "clang-18-darwin-aarch64", + "clang-18-aarch64", + "clang-18-aarch64-v80compat", + "clang-18-ppc64le", + "clang-18-riscv64", + "clang-18-s390x", + "clang-18-loongarch64", + "clang-18-amd64-compat", + "clang-18-amd64-musl", + "clang-18-freebsd", ), - default="clang-17", + default="clang-18", help="a compiler to use", ) parser.add_argument( diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5e224b16764..3f3b880c8f3 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.4.1.2088" +ARG VERSION="24.5.1.1763" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 7292163023d..5fd22ee9b51 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -1,11 +1,14 @@ FROM ubuntu:20.04 # see https://github.com/moby/moby/issues/4032#issuecomment-192327844 +# It could be removed after we move on a version 23:04+ ARG DEBIAN_FRONTEND=noninteractive # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" +# We shouldn't use `apt upgrade` to not change the upstream image. It's updated biweekly + # user/group precreated explicitly with fixed uid/gid on purpose. # It is especially important for rootless containers: in that case entrypoint # can't do chown and owners of mounted volumes should be configured externally. @@ -16,20 +19,21 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list && groupadd -r clickhouse --gid=101 \ && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ && apt-get update \ - && apt-get upgrade -yq \ && apt-get install --yes --no-install-recommends \ ca-certificates \ locales \ tzdata \ wget \ - && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.4.1.2088" +ARG VERSION="24.5.1.1763" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" +#docker-official-library:off +# The part between `docker-official-library` tags is related to our builds + # set non-empty deb_location_url url to create a docker image # from debs created by CI build, for example: # docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://..." -t ... @@ -80,19 +84,22 @@ RUN if [ -n "${single_binary_location_url}" ]; then \ && rm -rf /tmp/* ; \ fi +# The rest is the same in the official docker and in our build system +#docker-official-library:on + # A fallback to installation from ClickHouse repository RUN if ! clickhouse local -q "SELECT ''" > /dev/null 2>&1; then \ apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ - ca-certificates \ dirmngr \ gnupg2 \ && mkdir -p /etc/apt/sources.list.d \ && GNUPGHOME=$(mktemp -d) \ - && GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring \ + && GNUPGHOME="$GNUPGHOME" gpg --batch --no-default-keyring \ --keyring /usr/share/keyrings/clickhouse-keyring.gpg \ - --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 \ + --keyserver hkp://keyserver.ubuntu.com:80 \ + --recv-keys 3a9ea1193a97b548be1457d48919f6bd2b48d754 \ && rm -rf "$GNUPGHOME" \ && chmod +r /usr/share/keyrings/clickhouse-keyring.gpg \ && echo "${REPOSITORY}" > /etc/apt/sources.list.d/clickhouse.list \ @@ -127,7 +134,6 @@ RUN mkdir /docker-entrypoint-initdb.d COPY docker_related_config.xml /etc/clickhouse-server/config.d/ COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh EXPOSE 9000 8123 9009 VOLUME /var/lib/clickhouse diff --git a/docker/server/README.md b/docker/server/README.md index d6cf2dfdf09..65239126790 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -4,33 +4,34 @@ ClickHouse is an open-source column-oriented DBMS (columnar database management system) for online analytical processing (OLAP) that allows users to generate analytical reports using SQL queries in real-time. -ClickHouse works 100-1000x faster than traditional database management systems, and processes hundreds of millions to over a billion rows and tens of gigabytes of data per server per second. With a widespread user base around the globe, the technology has received praise for its reliability, ease of use, and fault tolerance. +ClickHouse works 100-1000x faster than traditional database management systems, and processes hundreds of millions to over a billion rows and tens of gigabytes of data per server per second. With a widespread user base around the globe, the technology has received praise for its reliability, ease of use, and fault tolerance. For more information and documentation see https://clickhouse.com/. ## Versions -- The `latest` tag points to the latest release of the latest stable branch. -- Branch tags like `22.2` point to the latest release of the corresponding branch. -- Full version tags like `22.2.3.5` point to the corresponding release. -- The tag `head` is built from the latest commit to the default branch. -- Each tag has optional `-alpine` suffix to reflect that it's built on top of `alpine`. +- The `latest` tag points to the latest release of the latest stable branch. +- Branch tags like `22.2` point to the latest release of the corresponding branch. +- Full version tags like `22.2.3.5` point to the corresponding release. +- The tag `head` is built from the latest commit to the default branch. +- Each tag has optional `-alpine` suffix to reflect that it's built on top of `alpine`. ### Compatibility -- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3. -- The arm64 image requires support for the [ARMv8.2-A architecture](https://en.wikipedia.org/wiki/AArch64#ARMv8.2-A) and additionally the Load-Acquire RCpc register. The register is optional in version ARMv8.2-A and mandatory in [ARMv8.3-A](https://en.wikipedia.org/wiki/AArch64#ARMv8.3-A). Supported in Graviton >=2, Azure and GCP instances. Examples for unsupported devices are Raspberry Pi 4 (ARMv8.0-A) and Jetson AGX Xavier/Orin (ARMv8.2-A). +- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3. +- The arm64 image requires support for the [ARMv8.2-A architecture](https://en.wikipedia.org/wiki/AArch64#ARMv8.2-A) and additionally the Load-Acquire RCpc register. The register is optional in version ARMv8.2-A and mandatory in [ARMv8.3-A](https://en.wikipedia.org/wiki/AArch64#ARMv8.3-A). Supported in Graviton >=2, Azure and GCP instances. Examples for unsupported devices are Raspberry Pi 4 (ARMv8.0-A) and Jetson AGX Xavier/Orin (ARMv8.2-A). ## How to use this image ### start server instance + ```bash docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server ``` By default, ClickHouse will be accessible only via the Docker network. See the [networking section below](#networking). -By default, starting above server instance will be run as the `default` user without password. +By default, starting above server instance will be run as the `default` user without password. ### connect to it from a native client @@ -66,9 +67,7 @@ docker run -d -p 18123:8123 -p19000:9000 --name some-clickhouse-server --ulimit echo 'SELECT version()' | curl 'http://localhost:18123/' --data-binary @- ``` -``` -22.6.3.35 -``` +`22.6.3.35` or by allowing the container to use [host ports directly](https://docs.docker.com/network/host/) using `--network=host` (also allows achieving better network performance): @@ -77,16 +76,14 @@ docker run -d --network=host --name some-clickhouse-server --ulimit nofile=26214 echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @- ``` -``` -22.6.3.35 -``` +`22.6.3.35` ### Volumes Typically you may want to mount the following folders inside your container to achieve persistency: -* `/var/lib/clickhouse/` - main folder where ClickHouse stores the data -* `/var/log/clickhouse-server/` - logs +- `/var/lib/clickhouse/` - main folder where ClickHouse stores the data +- `/var/log/clickhouse-server/` - logs ```bash docker run -d \ @@ -97,9 +94,9 @@ docker run -d \ You may also want to mount: -* `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustments -* `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustments -* `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). +- `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustments +- `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustments +- `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). ### Linux capabilities @@ -150,7 +147,7 @@ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLIC ## How to extend this image -To perform additional initialization in an image derived from this one, add one or more `*.sql`, `*.sql.gz`, or `*.sh` scripts under `/docker-entrypoint-initdb.d`. After the entrypoint calls `initdb`, it will run any `*.sql` files, run any executable `*.sh` scripts, and source any non-executable `*.sh` scripts found in that directory to do further initialization before starting the service. +To perform additional initialization in an image derived from this one, add one or more `*.sql`, `*.sql.gz`, or `*.sh` scripts under `/docker-entrypoint-initdb.d`. After the entrypoint calls `initdb`, it will run any `*.sql` files, run any executable `*.sh` scripts, and source any non-executable `*.sh` scripts found in that directory to do further initialization before starting the service. Also, you can provide environment variables `CLICKHOUSE_USER` & `CLICKHOUSE_PASSWORD` that will be used for clickhouse-client during initialization. For example, to add an additional user and database, add the following to `/docker-entrypoint-initdb.d/init-db.sh`: diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index 8858e12c50e..0c869a95db2 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -127,9 +127,6 @@ function setup_logs_replication echo 'Create all configured system logs' clickhouse-client --query "SYSTEM FLUSH LOGS" - # It's doesn't make sense to try creating tables if SYNC fails - echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client "${CONNECTION_ARGS[@]}" || return 0 - debug_or_sanitizer_build=$(clickhouse-client -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%'") echo "Build is debug or sanitizer: $debug_or_sanitizer_build" @@ -143,7 +140,7 @@ function setup_logs_replication time DateTime COMMENT 'The time of test run', test_name String COMMENT 'The name of the test', coverage Array(UInt64) COMMENT 'An array of addresses of the code (a subset of addresses instrumented for coverage) that were encountered during the test run' - ) ENGINE = Null COMMENT 'Contains information about per-test coverage from the CI, but used only for exporting to the CI cluster' + ) ENGINE = MergeTree ORDER BY test_name COMMENT 'Contains information about per-test coverage from the CI, but used only for exporting to the CI cluster' " # For each system log table: diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 912ff191e57..e0be261d5e8 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -1,5 +1,4 @@ -# rebuild in #33610 -# docker build -t clickhouse/fasttest . +# docker build -t clickhouse/fasttest . ARG FROM_TAG=latest FROM clickhouse/test-util:$FROM_TAG diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index bc7ffd1c2ef..4d5159cfa9e 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -160,10 +160,17 @@ function clone_submodules git submodule sync git submodule init - # --jobs does not work as fast as real parallel running - printf '%s\0' "${SUBMODULES_TO_UPDATE[@]}" | \ - xargs --max-procs=100 --null --no-run-if-empty --max-args=1 \ - git submodule update --depth 1 --single-branch + + # Network is unreliable + for _ in {1..10} + do + # --jobs does not work as fast as real parallel running + printf '%s\0' "${SUBMODULES_TO_UPDATE[@]}" | \ + xargs --max-procs=100 --null --no-run-if-empty --max-args=1 \ + git submodule update --depth 1 --single-branch && break + sleep 1 + done + git submodule foreach git reset --hard git submodule foreach git checkout @ -f git submodule foreach git clean -xfd diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index c31d2fd7f39..e2a4976b385 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -31,6 +31,16 @@ + + + + + + + + + + diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 76661a5b51c..b8f967ed9c2 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -17,7 +17,7 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch -BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-17_debug_none_unsplitted_disable_False_binary"} +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-18_debug_none_unsplitted_disable_False_binary"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function git_clone_with_retry @@ -138,7 +138,7 @@ function filter_exists_and_template # but it doesn't allow to use regex echo "$path" | sed 's/\.sql\.j2$/.gen.sql/' else - echo "'$path' does not exists" >&2 + echo "'$path' does not exist" >&2 fi done } diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 8297a7100d1..23d8a37d822 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -101,7 +101,8 @@ RUN python3 -m pip install --no-cache-dir \ retry==0.9.2 \ bs4==0.0.2 \ lxml==5.1.0 \ - urllib3==2.0.7 + urllib3==2.0.7 \ + jwcrypto==1.5.6 # bs4, lxml are for cloud tests, do not delete # Hudi supports only spark 3.3.*, not 3.4 diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index 576a0f0ef8e..444f3cd0de7 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-17_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-18_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh index 09198ca1968..0d3372b43be 100644 --- a/docker/test/server-jepsen/run.sh +++ b/docker/test/server-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-17_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-18_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/sqltest/run.sh b/docker/test/sqltest/run.sh index 1d939805c7b..7edc1341d7d 100755 --- a/docker/test/sqltest/run.sh +++ b/docker/test/sqltest/run.sh @@ -6,7 +6,7 @@ set -e set -u set -o pipefail -BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-17_debug_none_unsplitted_disable_False_binary"} +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-18_debug_none_unsplitted_disable_False_binary"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function wget_with_retry diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index f02a628d13e..c3d80a7334b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -89,8 +89,8 @@ ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 -RUN npm install -g azurite \ - && npm install -g tslib +RUN npm install -g azurite@3.30.0 \ + && npm install -g tslib && npm install -g node COPY run.sh / COPY setup_minio.sh / diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh index c756ce4669d..2b9433edd20 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/docker/test/stateless/setup_minio.sh @@ -83,7 +83,7 @@ setup_minio() { ./mc alias set clickminio http://localhost:11111 clickhouse clickhouse ./mc admin user add clickminio test testtest ./mc admin policy set clickminio readwrite user=test - ./mc mb clickminio/test + ./mc mb --ignore-existing clickminio/test if [ "$test_type" = "stateless" ]; then ./mc policy set public clickminio/test fi diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 23f942a00a2..3b6ad244c82 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -78,7 +78,8 @@ function configure() randomize_config_boolean_value use_compression zookeeper fi - randomize_config_boolean_value allow_experimental_block_number_column block_number + randomize_config_boolean_value enable_block_number_column block_number + randomize_config_boolean_value enable_block_offset_column block_number # for clickhouse-server (via service) echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment @@ -119,13 +120,46 @@ EOL local max_users_mem max_users_mem=$((total_mem*30/100)) # 30% + # Similar to docker/test/fuzzer/query-fuzzer-tweaks-users.xml echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G" - cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < /etc/clickhouse-server/users.d/stress_test_tweaks-users.xml < + 60 10G ${max_users_mem} + + 200 + + + + 60 + + + + 10G + + + + 200 + + + + + + + + + + + + + + + + + diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 5d53d03606f..cb29185f068 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -11,10 +11,10 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ aspell \ curl \ git \ + gh \ file \ libxml2-utils \ moreutils \ - python3-fuzzywuzzy \ python3-pip \ yamllint \ locales \ @@ -22,8 +22,18 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* # python-magic is the same version as in Ubuntu 22.04 -RUN pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \ - python-magic==0.4.24 requests types-requests \ +RUN pip3 install \ + PyGithub \ + black==23.12.0 \ + boto3 \ + codespell==2.2.1 \ + mypy==1.8.0 \ + pylint==3.1.0 \ + python-magic==0.4.24 \ + requests \ + thefuzz \ + types-requests \ + unidiff \ && rm -rf /root/.cache/pip RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 6761ddba3e5..1f2cc9903b2 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -58,47 +58,29 @@ echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/sys # Install previous release packages install_packages previous_release_package_folder -# Save old settings from system table for settings changes check -clickhouse-local -q "select * from system.settings format Native" > old_settings.native +# NOTE: we need to run clickhouse-local under script to get settings without any adjustments, like clickhouse-local does in case of stdout is not a tty +function save_settings_clean() +{ + local out=$1 && shift + script -q -c "clickhouse-local -q \"select * from system.settings into outfile '$out'\"" --log-out /dev/null +} + +# We save the (numeric) version of the old server to compare setting changes between the 2 +# We do this since we are testing against the latest release, not taking into account release candidates, so we might +# be testing current master (24.6) against the latest stable release (24.4) +function save_major_version() +{ + local out=$1 && shift + clickhouse-local -q "SELECT a[1]::UInt64 * 100 + a[2]::UInt64 as v FROM (Select splitByChar('.', version()) as a) into outfile '$out'" +} + +save_settings_clean 'old_settings.native' +save_major_version 'old_version.native' # Initial run without S3 to create system.*_log on local file system to make it # available for dump via clickhouse-local configure -function remove_keeper_config() -{ - sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml -} - -# async_replication setting doesn't exist on some older versions -remove_keeper_config "async_replication" "1" - -# create_if_not_exists feature flag doesn't exist on some older versions -remove_keeper_config "create_if_not_exists" "[01]" - -#todo: remove these after 24.3 released. -sudo sed -i "s|azure<|azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml - -#todo: remove these after 24.3 released. -sudo sed -i "s|local<|local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml - -# latest_logs_cache_size_threshold setting doesn't exist on some older versions -remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+" - -# commit_logs_cache_size_threshold setting doesn't exist on some older versions -remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" - -# it contains some new settings, but we can safely remove it -rm /etc/clickhouse-server/config.d/merge_tree.xml -rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml -rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml -rm /etc/clickhouse-server/config.d/storage_conf_02963.xml -rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml -rm /etc/clickhouse-server/config.d/handlers.yaml -rm /etc/clickhouse-server/users.d/nonconst_timezone.xml -rm /etc/clickhouse-server/users.d/s3_cache_new.xml -rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml - start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log @@ -110,44 +92,11 @@ export USE_S3_STORAGE_FOR_MERGE_TREE=1 export ZOOKEEPER_FAULT_INJECTION=0 configure -# force_sync=false doesn't work correctly on some older versions -sudo sed -i "s|false|true|" /etc/clickhouse-server/config.d/keeper_port.xml - -#todo: remove these after 24.3 released. -sudo sed -i "s|azure<|azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml - -#todo: remove these after 24.3 released. -sudo sed -i "s|local<|local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml - -# async_replication setting doesn't exist on some older versions -remove_keeper_config "async_replication" "1" - -# create_if_not_exists feature flag doesn't exist on some older versions -remove_keeper_config "create_if_not_exists" "[01]" - -# latest_logs_cache_size_threshold setting doesn't exist on some older versions -remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+" - -# commit_logs_cache_size_threshold setting doesn't exist on some older versions -remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" - # But we still need default disk because some tables loaded only into it sudo sed -i "s|
s3
|
s3
default|" /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -# it contains some new settings, but we can safely remove it -rm /etc/clickhouse-server/config.d/merge_tree.xml -rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml -rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml -rm /etc/clickhouse-server/config.d/storage_conf_02963.xml -rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml -rm /etc/clickhouse-server/config.d/handlers.yaml -rm /etc/clickhouse-server/config.d/block_number.xml -rm /etc/clickhouse-server/users.d/nonconst_timezone.xml -rm /etc/clickhouse-server/users.d/s3_cache_new.xml -rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml - start clickhouse-client --query="SELECT 'Server version: ', version()" @@ -183,9 +132,10 @@ configure IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'") if [ "${IS_SANITIZED}" -eq "0" ] then - clickhouse-local -q "select * from system.settings format Native" > new_settings.native + save_settings_clean 'new_settings.native' clickhouse-local -nmq " CREATE TABLE old_settings AS file('old_settings.native'); + CREATE TABLE old_version AS file('old_version.native'); CREATE TABLE new_settings AS file('new_settings.native'); SELECT @@ -196,8 +146,11 @@ then LEFT JOIN old_settings ON new_settings.name = old_settings.name WHERE (new_settings.value != old_settings.value) AND (name NOT IN ( SELECT arrayJoin(tupleElement(changes, 'name')) - FROM system.settings_changes - WHERE version = extract(version(), '^(?:\\d+\\.\\d+)') + FROM + ( + SELECT *, splitByChar('.', version) AS version_array FROM system.settings_changes + ) + WHERE (version_array[1]::UInt64 * 100 + version_array[2]::UInt64) > (SELECT v FROM old_version LIMIT 1) )) SETTINGS join_use_nulls = 1 INTO OUTFILE 'changed_settings.txt' @@ -210,8 +163,11 @@ then FROM old_settings )) AND (name NOT IN ( SELECT arrayJoin(tupleElement(changes, 'name')) - FROM system.settings_changes - WHERE version = extract(version(), '^(?:\\d+\\.\\d+)') + FROM + ( + SELECT *, splitByChar('.', version) AS version_array FROM system.settings_changes + ) + WHERE (version_array[1]::UInt64 * 100 + version_array[2]::UInt64) > (SELECT v FROM old_version LIMIT 1) )) INTO OUTFILE 'new_settings.txt' FORMAT PrettyCompactNoEscapes; diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 5446adf3793..f13bb576f79 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -5,7 +5,15 @@ FROM ubuntu:22.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=17 +# FIXME: rebuild for clang 18.1.3, that contains a workaround [1] for +# sanitizers issue [2]: +# +# $ git tag --contains c2a57034eff048cd36c563c8e0051db3a70991b3 | tail -1 +# llvmorg-18.1.3 +# +# [1]: https://github.com/llvm/llvm-project/commit/c2a57034eff048cd36c563c8e0051db3a70991b3 +# [2]: https://github.com/ClickHouse/ClickHouse/issues/64086 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18 RUN apt-get update \ && apt-get install \ diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md index fc912aba3e1..f4525d872df 100644 --- a/docs/_description_templates/template-setting.md +++ b/docs/_description_templates/template-setting.md @@ -2,7 +2,7 @@ Description. -For the switch setting, use the typical phrase: “Enables or disables something …”. +For the switch setting, use the typical phrase: “Enables or disables something ...”. Possible values: diff --git a/docs/_includes/install/deb_repo.sh b/docs/_includes/install/deb_repo.sh deleted file mode 100644 index 21106e9fc47..00000000000 --- a/docs/_includes/install/deb_repo.sh +++ /dev/null @@ -1,11 +0,0 @@ -sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 - -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ - /etc/apt/sources.list.d/clickhouse.list -sudo apt-get update - -sudo apt-get install -y clickhouse-server clickhouse-client - -sudo service clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/rpm_repo.sh b/docs/_includes/install/rpm_repo.sh deleted file mode 100644 index e3fd1232047..00000000000 --- a/docs/_includes/install/rpm_repo.sh +++ /dev/null @@ -1,7 +0,0 @@ -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo -sudo yum install clickhouse-server clickhouse-client - -sudo /etc/init.d/clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/tgz_repo.sh b/docs/_includes/install/tgz_repo.sh deleted file mode 100644 index 0994510755b..00000000000 --- a/docs/_includes/install/tgz_repo.sh +++ /dev/null @@ -1,19 +0,0 @@ -export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ - grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh diff --git a/docs/changelogs/v20.7.1.4310-prestable.md b/docs/changelogs/v20.7.1.4310-prestable.md index f47c7334228..aa1d993b263 100644 --- a/docs/changelogs/v20.7.1.4310-prestable.md +++ b/docs/changelogs/v20.7.1.4310-prestable.md @@ -166,4 +166,4 @@ * NO CL ENTRY: 'Revert "Abort on std::out_of_range in debug builds"'. [#12752](https://github.com/ClickHouse/ClickHouse/pull/12752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Bump protobuf from 3.12.2 to 3.12.4 in /docs/tools'. [#13102](https://github.com/ClickHouse/ClickHouse/pull/13102) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). * NO CL ENTRY: 'Merge [#12574](https://github.com/ClickHouse/ClickHouse/issues/12574)'. [#13158](https://github.com/ClickHouse/ClickHouse/pull/13158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* NO CL ENTRY: 'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer…"'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer..."'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/docs/changelogs/v21.12.1.9017-prestable.md b/docs/changelogs/v21.12.1.9017-prestable.md index 88b8260e312..bd84873e67a 100644 --- a/docs/changelogs/v21.12.1.9017-prestable.md +++ b/docs/changelogs/v21.12.1.9017-prestable.md @@ -421,5 +421,5 @@ sidebar_label: 2022 * Fix possible crash in DataTypeAggregateFunction [#32287](https://github.com/ClickHouse/ClickHouse/pull/32287) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Update backport.py [#32323](https://github.com/ClickHouse/ClickHouse/pull/32323) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix graphite-bench build [#32351](https://github.com/ClickHouse/ClickHouse/pull/32351) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Revert "graphite: split tagged/plain rollup rules (for merges perfoma… [#32376](https://github.com/ClickHouse/ClickHouse/pull/32376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "graphite: split tagged/plain rollup rules (for merges perfoma... [#32376](https://github.com/ClickHouse/ClickHouse/pull/32376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Another attempt to fix unit test Executor::RemoveTasksStress [#32390](https://github.com/ClickHouse/ClickHouse/pull/32390) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). diff --git a/docs/changelogs/v21.3.3.14-lts.md b/docs/changelogs/v21.3.3.14-lts.md index 57bde602f21..91d99deaa6b 100644 --- a/docs/changelogs/v21.3.3.14-lts.md +++ b/docs/changelogs/v21.3.3.14-lts.md @@ -18,4 +18,4 @@ sidebar_label: 2022 #### NOT FOR CHANGELOG / INSIGNIFICANT -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). diff --git a/docs/changelogs/v21.4.1.6422-prestable.md b/docs/changelogs/v21.4.1.6422-prestable.md index 2eadb0d4754..66937c3be15 100644 --- a/docs/changelogs/v21.4.1.6422-prestable.md +++ b/docs/changelogs/v21.4.1.6422-prestable.md @@ -223,7 +223,7 @@ sidebar_label: 2022 * Do not overlap zookeeper path for ReplicatedMergeTree in stateless *.sh tests [#21724](https://github.com/ClickHouse/ClickHouse/pull/21724) ([Azat Khuzhin](https://github.com/azat)). * make the fuzzer use sources from the CI [#21754](https://github.com/ClickHouse/ClickHouse/pull/21754) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Add one more variant to memcpy benchmark [#21759](https://github.com/ClickHouse/ClickHouse/pull/21759) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). * docs(fix): typo [#21775](https://github.com/ClickHouse/ClickHouse/pull/21775) ([Ali Demirci](https://github.com/depyronick)). * DDLWorker.cpp: fixed exceeded amount of tries typo [#21807](https://github.com/ClickHouse/ClickHouse/pull/21807) ([Eldar Nasyrov](https://github.com/3ldar-nasyrov)). * fix integration MaterializeMySQL test [#21819](https://github.com/ClickHouse/ClickHouse/pull/21819) ([TCeason](https://github.com/TCeason)). diff --git a/docs/changelogs/v21.4.2.10-prestable.md b/docs/changelogs/v21.4.2.10-prestable.md index 3db17ddfcf3..b9bdbd80c0c 100644 --- a/docs/changelogs/v21.4.2.10-prestable.md +++ b/docs/changelogs/v21.4.2.10-prestable.md @@ -226,7 +226,7 @@ sidebar_label: 2022 * Do not overlap zookeeper path for ReplicatedMergeTree in stateless *.sh tests [#21724](https://github.com/ClickHouse/ClickHouse/pull/21724) ([Azat Khuzhin](https://github.com/azat)). * make the fuzzer use sources from the CI [#21754](https://github.com/ClickHouse/ClickHouse/pull/21754) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Add one more variant to memcpy benchmark [#21759](https://github.com/ClickHouse/ClickHouse/pull/21759) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). * docs(fix): typo [#21775](https://github.com/ClickHouse/ClickHouse/pull/21775) ([Ali Demirci](https://github.com/depyronick)). * DDLWorker.cpp: fixed exceeded amount of tries typo [#21807](https://github.com/ClickHouse/ClickHouse/pull/21807) ([Eldar Nasyrov](https://github.com/3ldar-nasyrov)). * fix integration MaterializeMySQL test [#21819](https://github.com/ClickHouse/ClickHouse/pull/21819) ([TCeason](https://github.com/TCeason)). diff --git a/docs/changelogs/v22.11.1.1360-stable.md b/docs/changelogs/v22.11.1.1360-stable.md index 4aa110484f8..4acaffb2c3b 100644 --- a/docs/changelogs/v22.11.1.1360-stable.md +++ b/docs/changelogs/v22.11.1.1360-stable.md @@ -93,7 +93,7 @@ sidebar_label: 2022 * `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). * Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). * Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). -* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible SIGSEGV for web disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). * Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). * Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). * Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.6.1.1985-stable.md b/docs/changelogs/v22.6.1.1985-stable.md index c915d24fe00..7bd7038377a 100644 --- a/docs/changelogs/v22.6.1.1985-stable.md +++ b/docs/changelogs/v22.6.1.1985-stable.md @@ -160,7 +160,7 @@ sidebar_label: 2022 * fix toString error on DatatypeDate32. [#37775](https://github.com/ClickHouse/ClickHouse/pull/37775) ([LiuNeng](https://github.com/liuneng1994)). * The clickhouse-keeper setting `dead_session_check_period_ms` was transformed into microseconds (multiplied by 1000), which lead to dead sessions only being cleaned up after several minutes (instead of 500ms). [#37824](https://github.com/ClickHouse/ClickHouse/pull/37824) ([Michael Lex](https://github.com/mlex)). * Fix possible "No more packets are available" for distributed queries (in case of `async_socket_for_remote`/`use_hedged_requests` is disabled). [#37826](https://github.com/ClickHouse/ClickHouse/pull/37826) ([Azat Khuzhin](https://github.com/azat)). -* Do not drop the inner target table when executing `ALTER TABLE … MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)). +* Do not drop the inner target table when executing `ALTER TABLE ... MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)). * Fix directory ownership of coordination dir in clickhouse-keeper Docker image. Fixes [#37914](https://github.com/ClickHouse/ClickHouse/issues/37914). [#37915](https://github.com/ClickHouse/ClickHouse/pull/37915) ([James Maidment](https://github.com/jamesmaidment)). * Dictionaries fix custom query with update field and `{condition}`. Closes [#33746](https://github.com/ClickHouse/ClickHouse/issues/33746). [#37947](https://github.com/ClickHouse/ClickHouse/pull/37947) ([Maksim Kita](https://github.com/kitaisreal)). * Fix possible incorrect result of `SELECT ... WITH FILL` in the case when `ORDER BY` should be applied after `WITH FILL` result (e.g. for outer query). Incorrect result was caused by optimization for `ORDER BY` expressions ([#35623](https://github.com/ClickHouse/ClickHouse/issues/35623)). Closes [#37904](https://github.com/ClickHouse/ClickHouse/issues/37904). [#37959](https://github.com/ClickHouse/ClickHouse/pull/37959) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). @@ -180,7 +180,7 @@ sidebar_label: 2022 #### NO CL ENTRY * NO CL ENTRY: 'Revert "Fix mutations in tables with columns of type `Object`"'. [#37355](https://github.com/ClickHouse/ClickHouse/pull/37355) ([Alexander Tokmakov](https://github.com/tavplubix)). -* NO CL ENTRY: 'Revert "Remove height restrictions from the query div in play web tool, and m…"'. [#37501](https://github.com/ClickHouse/ClickHouse/pull/37501) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Remove height restrictions from the query div in play web tool, and m..."'. [#37501](https://github.com/ClickHouse/ClickHouse/pull/37501) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Revert "Add support for preprocessing ZooKeeper operations in `clickhouse-keeper`"'. [#37534](https://github.com/ClickHouse/ClickHouse/pull/37534) ([Antonio Andelic](https://github.com/antonio2368)). * NO CL ENTRY: 'Revert "(only with zero-copy replication, non-production experimental feature not recommended to use) fix possible deadlock during fetching part"'. [#37545](https://github.com/ClickHouse/ClickHouse/pull/37545) ([Alexander Tokmakov](https://github.com/tavplubix)). * NO CL ENTRY: 'Revert "RFC: Fix converting types for UNION queries (may produce LOGICAL_ERROR)"'. [#37582](https://github.com/ClickHouse/ClickHouse/pull/37582) ([Dmitry Novik](https://github.com/novikd)). diff --git a/docs/changelogs/v22.7.1.2484-stable.md b/docs/changelogs/v22.7.1.2484-stable.md index 7464b0449ee..c4a76c66e0c 100644 --- a/docs/changelogs/v22.7.1.2484-stable.md +++ b/docs/changelogs/v22.7.1.2484-stable.md @@ -410,7 +410,7 @@ sidebar_label: 2022 * Add test for [#39132](https://github.com/ClickHouse/ClickHouse/issues/39132) [#39173](https://github.com/ClickHouse/ClickHouse/pull/39173) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Suppression for BC check (`Cannot parse string 'Hello' as UInt64`) [#39176](https://github.com/ClickHouse/ClickHouse/pull/39176) ([Alexander Tokmakov](https://github.com/tavplubix)). * Fix 01961_roaring_memory_tracking test [#39187](https://github.com/ClickHouse/ClickHouse/pull/39187) ([Dmitry Novik](https://github.com/novikd)). -* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on … [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)). +* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on ... [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)). * Fix exception in AsynchronousMetrics for s390x [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)). * Optimize accesses to system.stack_trace (filter by name before sending signal) [#39212](https://github.com/ClickHouse/ClickHouse/pull/39212) ([Azat Khuzhin](https://github.com/azat)). * Enable warning "-Wdeprecated-dynamic-exception-spec" [#39213](https://github.com/ClickHouse/ClickHouse/pull/39213) ([Robert Schulze](https://github.com/rschu1ze)). diff --git a/docs/changelogs/v22.8.1.2097-lts.md b/docs/changelogs/v22.8.1.2097-lts.md index b6b634f4826..f9a1fa8a4a9 100644 --- a/docs/changelogs/v22.8.1.2097-lts.md +++ b/docs/changelogs/v22.8.1.2097-lts.md @@ -53,7 +53,7 @@ sidebar_label: 2022 * Store Keeper API version inside a predefined path. [#39096](https://github.com/ClickHouse/ClickHouse/pull/39096) ([Antonio Andelic](https://github.com/antonio2368)). * Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Add profile events for fsync. [#39179](https://github.com/ClickHouse/ClickHouse/pull/39179) ([Azat Khuzhin](https://github.com/azat)). -* Add the second argument to the ordinary function `file(path[, default])`, which function returns in the case when a file does not exists. [#39218](https://github.com/ClickHouse/ClickHouse/pull/39218) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add the second argument to the ordinary function `file(path[, default])`, which function returns in the case when a file does not exist. [#39218](https://github.com/ClickHouse/ClickHouse/pull/39218) ([Nikolay Degterinsky](https://github.com/evillique)). * Some small fixes for reading via http, allow to retry partial content in case if got 200OK. [#39244](https://github.com/ClickHouse/ClickHouse/pull/39244) ([Kseniia Sumarokova](https://github.com/kssenii)). * Improved Base58 encoding/decoding. [#39292](https://github.com/ClickHouse/ClickHouse/pull/39292) ([Andrey Zvonov](https://github.com/zvonand)). * Normalize `AggregateFunction` types and state representations because optimizations like https://github.com/ClickHouse/ClickHouse/pull/35788 will treat `count(not null columns)` as `count()`, which might confuses distributed interpreters with the following error : `Conversion from AggregateFunction(count) to AggregateFunction(count, Int64) is not supported`. [#39420](https://github.com/ClickHouse/ClickHouse/pull/39420) ([Amos Bird](https://github.com/amosbird)). diff --git a/docs/changelogs/v22.8.13.20-lts.md b/docs/changelogs/v22.8.13.20-lts.md index 0734f40bf3e..ad44fbfc5d6 100644 --- a/docs/changelogs/v22.8.13.20-lts.md +++ b/docs/changelogs/v22.8.13.20-lts.md @@ -20,4 +20,4 @@ sidebar_label: 2023 * Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un… [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)). +* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un... [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v23.10.1.1976-stable.md b/docs/changelogs/v23.10.1.1976-stable.md index b08383a859b..4d093f934f1 100644 --- a/docs/changelogs/v23.10.1.1976-stable.md +++ b/docs/changelogs/v23.10.1.1976-stable.md @@ -291,7 +291,7 @@ sidebar_label: 2023 * Fix replica groups for Replicated database engine [#55587](https://github.com/ClickHouse/ClickHouse/pull/55587) ([Azat Khuzhin](https://github.com/azat)). * Remove unused protobuf includes [#55590](https://github.com/ClickHouse/ClickHouse/pull/55590) ([Raúl Marín](https://github.com/Algunenano)). * Apply Context changes to standalone Keeper [#55591](https://github.com/ClickHouse/ClickHouse/pull/55591) ([Antonio Andelic](https://github.com/antonio2368)). -* Do not fail if label-to-remove does not exists in PR [#55592](https://github.com/ClickHouse/ClickHouse/pull/55592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not fail if label-to-remove does not exist in PR [#55592](https://github.com/ClickHouse/ClickHouse/pull/55592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * CI: cast extra column expression `pull_request_number` to Int32 [#55599](https://github.com/ClickHouse/ClickHouse/pull/55599) ([Han Fei](https://github.com/hanfei1991)). * Add back a test that was removed by mistake [#55605](https://github.com/ClickHouse/ClickHouse/pull/55605) ([Alexander Tokmakov](https://github.com/tavplubix)). * Bump croaring to v2.0.4 [#55606](https://github.com/ClickHouse/ClickHouse/pull/55606) ([Robert Schulze](https://github.com/rschu1ze)). diff --git a/docs/changelogs/v23.11.1.2711-stable.md b/docs/changelogs/v23.11.1.2711-stable.md index e32dee41dc7..0bdee08f5c9 100644 --- a/docs/changelogs/v23.11.1.2711-stable.md +++ b/docs/changelogs/v23.11.1.2711-stable.md @@ -217,7 +217,7 @@ sidebar_label: 2023 * S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). * Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). +* Fix Nullptr dereference in partial merge join with joined_subquery_re... [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). * Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). * Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). diff --git a/docs/changelogs/v23.12.1.1368-stable.md b/docs/changelogs/v23.12.1.1368-stable.md index 1a322ae9c0f..cb8ba57100e 100644 --- a/docs/changelogs/v23.12.1.1368-stable.md +++ b/docs/changelogs/v23.12.1.1368-stable.md @@ -272,7 +272,7 @@ sidebar_label: 2023 * Bump Azure to v1.6.0 [#58052](https://github.com/ClickHouse/ClickHouse/pull/58052) ([Robert Schulze](https://github.com/rschu1ze)). * Correct values for randomization [#58058](https://github.com/ClickHouse/ClickHouse/pull/58058) ([Anton Popov](https://github.com/CurtizJ)). * Non post request should be readonly [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)). -* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test… [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)). +* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test... [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)). * fix typo in the test 02479 [#58072](https://github.com/ClickHouse/ClickHouse/pull/58072) ([Sema Checherinda](https://github.com/CheSema)). * Bump Azure to 1.7.2 [#58075](https://github.com/ClickHouse/ClickHouse/pull/58075) ([Robert Schulze](https://github.com/rschu1ze)). * Fix flaky test `02567_and_consistency` [#58076](https://github.com/ClickHouse/ClickHouse/pull/58076) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v23.3.1.2823-lts.md b/docs/changelogs/v23.3.1.2823-lts.md index 0c9be3601da..f81aba53ebe 100644 --- a/docs/changelogs/v23.3.1.2823-lts.md +++ b/docs/changelogs/v23.3.1.2823-lts.md @@ -520,7 +520,7 @@ sidebar_label: 2023 * Improve script for updating clickhouse-docs [#48135](https://github.com/ClickHouse/ClickHouse/pull/48135) ([Alexander Tokmakov](https://github.com/tavplubix)). * Fix stdlib compatibility issues [#48150](https://github.com/ClickHouse/ClickHouse/pull/48150) ([DimasKovas](https://github.com/DimasKovas)). * Make test test_disallow_concurrency less flaky [#48152](https://github.com/ClickHouse/ClickHouse/pull/48152) ([Vitaly Baranov](https://github.com/vitlibar)). -* Remove unused mockSystemDatabase from gtest_transform_query_for_exter… [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)). +* Remove unused mockSystemDatabase from gtest_transform_query_for_exter... [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)). * Update environmental-sensors.md [#48166](https://github.com/ClickHouse/ClickHouse/pull/48166) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Correctly handle NULL constants in logical optimizer for new analyzer [#48168](https://github.com/ClickHouse/ClickHouse/pull/48168) ([Antonio Andelic](https://github.com/antonio2368)). * Try making KeeperMap test more stable [#48170](https://github.com/ClickHouse/ClickHouse/pull/48170) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/docs/changelogs/v23.5.1.3174-stable.md b/docs/changelogs/v23.5.1.3174-stable.md index 2212eb6e893..4bdd4139afc 100644 --- a/docs/changelogs/v23.5.1.3174-stable.md +++ b/docs/changelogs/v23.5.1.3174-stable.md @@ -474,7 +474,7 @@ sidebar_label: 2023 * Fix flakiness of test_distributed_load_balancing test [#49921](https://github.com/ClickHouse/ClickHouse/pull/49921) ([Azat Khuzhin](https://github.com/azat)). * Add some logging [#49925](https://github.com/ClickHouse/ClickHouse/pull/49925) ([Kseniia Sumarokova](https://github.com/kssenii)). * Support hardlinking parts transactionally [#49931](https://github.com/ClickHouse/ClickHouse/pull/49931) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix for analyzer: 02377_ optimize_sorting_by_input_stream_properties_e… [#49943](https://github.com/ClickHouse/ClickHouse/pull/49943) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix for analyzer: 02377_ optimize_sorting_by_input_stream_properties_e... [#49943](https://github.com/ClickHouse/ClickHouse/pull/49943) ([Igor Nikonov](https://github.com/devcrafter)). * Follow up to [#49429](https://github.com/ClickHouse/ClickHouse/issues/49429) [#49964](https://github.com/ClickHouse/ClickHouse/pull/49964) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix flaky test_ssl_cert_authentication to use urllib3 [#49982](https://github.com/ClickHouse/ClickHouse/pull/49982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). * Fix woboq codebrowser build with -Wno-poison-system-directories [#49992](https://github.com/ClickHouse/ClickHouse/pull/49992) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v23.8.1.2992-lts.md b/docs/changelogs/v23.8.1.2992-lts.md index 7c224b19350..62326533a79 100644 --- a/docs/changelogs/v23.8.1.2992-lts.md +++ b/docs/changelogs/v23.8.1.2992-lts.md @@ -33,7 +33,7 @@ sidebar_label: 2023 * Add input format One that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)). * Add tupleConcat function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)). * Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)). -* Add max_threads_for_indexes setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)). +* Add max_threads_for_indexes setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([Joris Giovannangeli](https://github.com/jorisgio)). * Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). * Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)). * ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)). @@ -72,7 +72,7 @@ sidebar_label: 2023 * Add ability to log when max_partitions_per_insert_block is reached ... [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)). * Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)). * Updated check for connection_string as connection string with sas does not always begin with DefaultEndPoint and updated connection url to include sas token after adding container to url. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix description for filtering sets in full_sorting_merge join. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)). +* Fix description for filtering sets in full_sorting_merge join. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([ttanay](https://github.com/ttanay)). * The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime. [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)). * Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)). * Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)). @@ -80,10 +80,10 @@ sidebar_label: 2023 * Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)). * Function `arrayIntersect` now returns the values sorted like the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). * Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)). -* ALTER TABLE FREEZE are not replicated in Replicated engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)). +* ALTER TABLE FREEZE are not replicated in Replicated engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mikhail Kot](https://github.com/myrrc)). * Added possibility to flush logs to the disk on crash - Added logs buffer configuration. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix S3 table function does not work for pre-signed URL. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)). -* System.events and system.metrics tables add column name as an alias to event and metric. close [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)). +* Fix S3 table function does not work for pre-signed URL. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([Jensen](https://github.com/xiedeyantu)). +* System.events and system.metrics tables add column name as an alias to event and metric. close [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([Jensen](https://github.com/xiedeyantu)). * Added support of syntax `CREATE UNIQUE INDEX` in parser for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique=1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)). * Add support of predefined macro (`{database}` and `{table}`) in some kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)). * Disable updating fs cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)). @@ -107,7 +107,7 @@ sidebar_label: 2023 * Use the same default paths for `clickhouse_keeper` (symlink) as for `clickhouse_keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)). * CVE-2016-2183: disable 3DES. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)). * Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Improve error message for table function remote. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)). +* Improve error message for table function remote. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([Jiyoung Yoo](https://github.com/jiyoungyoooo)). * Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)). * Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)). * Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)). @@ -127,7 +127,7 @@ sidebar_label: 2023 * Server settings asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s configured to 0 now fail gracefully instead of crash the server. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)). * Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)). * The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)). -* Add ability to turn off flush of Distributed tables on `DETACH`/`DROP`/server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). +* Add ability to turn off flush of Distributed tables on `DETACH`/`DROP`/server shutdown (`flush_on_detach` setting for `Distributed`). [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). * Domainrfc support ipv6(ip literal within square brackets). [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). * Use filter by file/path before reading in url/file/hdfs table functins. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)). * Use longer timeout for S3 CopyObject requests. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). @@ -186,71 +186,71 @@ sidebar_label: 2023 #### Bug Fix (user-visible misbehavior in an official stable release) -* Do not reset Annoy index during build-up with > 1 mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). -* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). -* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Bug fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't throw exception when exec udf has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). -* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). -* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). -* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). -* RFC: Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). -* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). -* Fix named collections on cluster 23.7 [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). -* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). -* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)). -* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). -* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). -* make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). -* Fix possible assert in ~PushingAsyncPipelineExecutor in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). -* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). -* Convert sparse to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). -* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). -* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). -* #2 Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* #3 Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). -* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). -* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). -* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix 'Context has expired' error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix timeout_overflow_mode when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). -* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). -* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). -* bugfix: Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). -* Fix crash in join on sparse column [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). -* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). -* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). -* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). -* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)). -* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). -* Try to fix bug with NULL::LowCardinality(Nullable(...)) NOT IN [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). -* transform: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). -* Materialized postgres: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix rows_before_limit_at_least for DelayedSource. [#54122](https://github.com/ClickHouse/ClickHouse/pull/54122) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix results of queries utilizing the Annoy index when the part has more than one mark. [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). +* Fix usage of temporary directories during RESTORE. [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). +* Fixed binary arithmetic for Nullable(IPv4). [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes. [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Updated checkDataPart to read compress marks as compressed file by checking its extension resolves [#51337](https://github.com/ClickHouse/ClickHouse/issues/51337). [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix mistakenly comma parsing as part of datetime in CSV datetime best effort parsing. Closes [#51059](https://github.com/ClickHouse/ClickHouse/issues/51059). [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed exception when executable udf was provided with a parameter. [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed recalculation of skip indexes and projections in `ALTER DELETE` queries. [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed the infinite loop in ReadBuffer when the pos overflows the end of the buffer in MaterializedMySQL. [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). +* Do not try to load suggestions in `clickhouse-local` when a the dialect is not `clickhouse`. [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Remove mutex from CaresPTRResolver and create `ares_channel` on demand. Trying to fix: https://github.com/ClickHouse/ClickHouse/pull/52327#issuecomment-1643021543. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix filtering by virtual columns with OR expression (i.e. by `_table` for `Merge` engine). [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in function `tuple` with one sparse column argument. [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix named collections related statements: `if [not] exists`, `on cluster`. Closes [#51609](https://github.com/ClickHouse/ClickHouse/issues/51609). [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix reading of unnecessary column in case of multistage `PREWHERE`. [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix unexpected sort result on multi columns with nulls first direction. [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([ZhiHong Zhang](https://github.com/copperybean)). +* Keeper fix: fix data race during reconfiguration. [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* Fixed sorting of sparse columns in case of `ORDER BY ... LIMIT n` clause and large values of `n`. [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). +* Keeper fix: platforms that used poll() would delay responding to requests until the client sent a heartbeat. [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Make regexp analyzer recognize named capturing groups. [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible assert in ~PushingAsyncPipelineExecutor in clickhouse-local. [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of empty `Nested(Array(LowCardinality(...)))` columns (added by `ALTER TABLE ... ADD COLUMN ...` query and not materialized in parts) from compact parts of `MergeTree` tables. [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed the record inconsistency in session_log between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix password leak in show create mysql table. [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Fix possible crash in full sorting merge join on sparse columns, close [#52978](https://github.com/ClickHouse/ClickHouse/issues/52978). [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). +* Fix very rare race condition with empty key prefix directory deletion in fs cache. [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed `output_format_parquet_compression_method='zstd'` producing invalid Parquet files sometimes. In older versions, use setting `output_format_parquet_use_custom_encoder = 0` as a workaround. [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix query_id in part_log with async flush queries. [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible error from filesystem cache "Read unexpected size". [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable the new parquet encoder: it has a bug. [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `Not-ready Set is passed as the second argument for function 'in'` could happen with limited `max_result_rows` and ` result_overflow_mode = 'break'`. [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix character escaping in the PostgreSQL engine (`\'` -> `''`, `\\` -> `\`). Closes [#49821](https://github.com/ClickHouse/ClickHouse/issues/49821). [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixed the record inconsistency in session_log between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fixed the record inconsistency in session_log between login and logout. [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fixed adding intervals of a fraction of a second to DateTime producing incorrect result. [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix the "Context has expired" error in dictionaries when using subqueries. [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect normal projection AST format when single function is used in ORDER BY. This fixes [#52607](https://github.com/ClickHouse/ClickHouse/issues/52607). [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Forbid `use_structure_from_insertion_table_in_table_functions` when execute Scalar. Closes [#52494](https://github.com/ClickHouse/ClickHouse/issues/52494). [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). +* Avoid loading tables from lazy database when not needed Follow up to [#43840](https://github.com/ClickHouse/ClickHouse/issues/43840). [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed `system.data_skipping_indices` columns `data_compressed_bytes` and `data_uncompressed_bytes` for MaterializedMySQL. [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix processing single carriage return in TSV file segmentation engine that could lead to parsing errors. Closes [#53320](https://github.com/ClickHouse/ClickHouse/issues/53320). [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix the "Context has expired" error when using subqueries with functions `file()` (regular function, not table function), `joinGet()`, `joinGetOrNull()`, `connectionId()`. [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix timeout_overflow_mode when having subquery in the rhs of IN. [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). +* This PR fixes [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152). [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Fix the JSON_QUERY function can not parse the json string while path is numberic. like in the query SELECT JSON_QUERY('{"123":"abcd"}', '$.123'), we would encounter the exceptions ``` DB::Exception: Unable to parse JSONPath: While processing JSON_QUERY('{"123":"acd"}', '$.123'). (BAD_ARGUMENTS) ```. [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix possible crash for queries with parallel `FINAL` where `ORDER BY` and `PRIMARY KEY` are different in table definition. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed ReplacingMergeTree to properly process single-partition cases when `do_not_merge_across_partitions_select_final=1`. Previously `SELECT` could return rows that were marked as deleted. [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix bug in flushing of async insert queue on graceful shutdown. [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). +* Fix crash in join on sparse column. [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). +* Fix possible UB in Set skipping index for functions with incorrect args. [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible UB in inverted indexes (experimental feature). [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). +* Fixed bug for interpolate when interpolated column is aliased with the same name as a source column. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fixed a bug in EXPLAIN PLAN index=1 where the number of dropped granules was incorrect. [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). +* Correctly handle totals and extremes when `DelayedSource` is used. [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `Pipeline stuck` error in mutation with `IN (subquery WITH TOTALS)` where ready set was taken from cache. [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow to use JSON subcolumns in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([zps](https://github.com/VanDarkholme7)). +* Fix possible logical error exception during filter pushdown for full_sorting_merge join. [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). +* Fix NULL::LowCardinality(Nullable(...)) with IN. [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). +* Fixes possible crashes in `DISTINCT` queries with enabled `optimize_distinct_in_order` and sparse columns. [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Correctly handle default column with multiple rows in transform. [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix crash in SQL function parseDateTime() with non-const timezone argument. [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix uncaught exception in `getCreateTableQueryImpl`. [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible segfault while using PostgreSQL engine. Closes [#36919](https://github.com/ClickHouse/ClickHouse/issues/36919). [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix `named_collection_admin` alias to `named_collection_control` not working from config. [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). +* A distributed query could miss `rows_before_limit_at_least` in the query result in case it was executed on a replica with a delay more than `max_replica_delay_for_distributed_queries`. [#54122](https://github.com/ClickHouse/ClickHouse/pull/54122) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). #### NO CL ENTRY @@ -540,7 +540,7 @@ sidebar_label: 2023 * Do not warn about arch_sys_counter clock [#53739](https://github.com/ClickHouse/ClickHouse/pull/53739) ([Artur Malchanau](https://github.com/Hexta)). * Add some profile events [#53741](https://github.com/ClickHouse/ClickHouse/pull/53741) ([Kseniia Sumarokova](https://github.com/kssenii)). * Support clang-18 (Wmissing-field-initializers) [#53751](https://github.com/ClickHouse/ClickHouse/pull/53751) ([Raúl Marín](https://github.com/Algunenano)). -* Upgrade openSSL to v3.0.10 [#53756](https://github.com/ClickHouse/ClickHouse/pull/53756) ([bhavnajindal](https://github.com/bhavnajindal)). +* Upgrade openSSL to v3.0.10 [#53756](https://github.com/ClickHouse/ClickHouse/pull/53756) ([Bhavna Jindal](https://github.com/bhavnajindal)). * Improve JSON-handling on s390x [#53760](https://github.com/ClickHouse/ClickHouse/pull/53760) ([ltrk2](https://github.com/ltrk2)). * Reduce API calls to SSM client [#53762](https://github.com/ClickHouse/ClickHouse/pull/53762) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Remove branch references from .gitmodules [#53763](https://github.com/ClickHouse/ClickHouse/pull/53763) ([Robert Schulze](https://github.com/rschu1ze)). @@ -588,3 +588,4 @@ sidebar_label: 2023 * tests: mark 02152_http_external_tables_memory_tracking as no-parallel [#54155](https://github.com/ClickHouse/ClickHouse/pull/54155) ([Azat Khuzhin](https://github.com/azat)). * The external logs have had colliding arguments [#54165](https://github.com/ClickHouse/ClickHouse/pull/54165) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Rename macro [#54169](https://github.com/ClickHouse/ClickHouse/pull/54169) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v23.8.10.43-lts.md b/docs/changelogs/v23.8.10.43-lts.md index 0093467d129..0750901da8a 100644 --- a/docs/changelogs/v23.8.10.43-lts.md +++ b/docs/changelogs/v23.8.10.43-lts.md @@ -16,17 +16,17 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). -* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). -* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). -* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). -* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). -* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). -* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#57565](https://github.com/ClickHouse/ClickHouse/issues/57565): Background merges correctly use temporary data storage in the cache. [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). +* Backported in [#57476](https://github.com/ClickHouse/ClickHouse/issues/57476): Fix possible broken skipping indexes after materialization in MergeTree compact parts. [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#58777](https://github.com/ClickHouse/ClickHouse/issues/58777): Fix double destroy call on exception throw in addBatchLookupTable8. [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#58856](https://github.com/ClickHouse/ClickHouse/issues/58856): Fix possible crash in JSONExtract function extracting `LowCardinality(Nullable(T))` type. [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* Backported in [#59194](https://github.com/ClickHouse/ClickHouse/issues/59194): The combination of LIMIT BY and LIMIT could produce an incorrect result in distributed queries (parallel replicas included). [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#59429](https://github.com/ClickHouse/ClickHouse/issues/59429): Fix translate() with FixedString input. Could lead to crashes as it'd return a String column (vs the expected FixedString). This issue was found through ClickHouse Bug Bounty Program YohannJardin. [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60128](https://github.com/ClickHouse/ClickHouse/issues/60128): Fix error `Read beyond last offset` for `AsynchronousBoundedReadBuffer`. [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#59836](https://github.com/ClickHouse/ClickHouse/issues/59836): Fix query start time on non initial queries. [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#59758](https://github.com/ClickHouse/ClickHouse/issues/59758): Fix leftPad / rightPad function with FixedString input. [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60304](https://github.com/ClickHouse/ClickHouse/issues/60304): Fix having neigher acked nor nacked messages. If exception happens during read-write phase, messages will be nacked. [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#60171](https://github.com/ClickHouse/ClickHouse/issues/60171): Fix cosineDistance crash with Nullable. [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). #### NOT FOR CHANGELOG / INSIGNIFICANT diff --git a/docs/changelogs/v23.8.11.28-lts.md b/docs/changelogs/v23.8.11.28-lts.md index acc284caa72..3da3d10cfa5 100644 --- a/docs/changelogs/v23.8.11.28-lts.md +++ b/docs/changelogs/v23.8.11.28-lts.md @@ -12,11 +12,11 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). -* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). -* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#60983](https://github.com/ClickHouse/ClickHouse/issues/60983): Fix buffer overflow that can happen if the attacker asks the HTTP server to decompress data with a composition of codecs and size triggering numeric overflow. Fix buffer overflow that can happen inside codec NONE on wrong input data. This was submitted by TIANGONG research team through our [Bug Bounty program](https://github.com/ClickHouse/ClickHouse/issues/38986). [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#60986](https://github.com/ClickHouse/ClickHouse/issues/60986): Functions for SQL/JSON were able to read uninitialized memory. This closes [#60017](https://github.com/ClickHouse/ClickHouse/issues/60017). Found by Fuzzer. [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#60816](https://github.com/ClickHouse/ClickHouse/issues/60816): Fix crash in arrayEnumerateRanked. [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60837](https://github.com/ClickHouse/ClickHouse/issues/60837): Fix crash when using input() in INSERT SELECT JOIN. Closes [#60035](https://github.com/ClickHouse/ClickHouse/issues/60035). [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#60911](https://github.com/ClickHouse/ClickHouse/issues/60911): Avoid segfault if too many keys are skipped when reading from S3. [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). #### NO CL ENTRY diff --git a/docs/changelogs/v23.8.12.13-lts.md b/docs/changelogs/v23.8.12.13-lts.md index dbb36fdc00e..0329d4349f3 100644 --- a/docs/changelogs/v23.8.12.13-lts.md +++ b/docs/changelogs/v23.8.12.13-lts.md @@ -9,9 +9,9 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). +* Backported in [#61439](https://github.com/ClickHouse/ClickHouse/issues/61439): The query cache now denies access to entries when the user is re-created or assumes another role. This improves prevents attacks where 1. an user with the same name as a dropped user may access the old user's cache entries or 2. a user with a different role may access cache entries of a role with a different row policy. [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#61572](https://github.com/ClickHouse/ClickHouse/issues/61572): Fix string search with constant start position which previously could lead to memory corruption. [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#61854](https://github.com/ClickHouse/ClickHouse/issues/61854): Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` when specifying incorrect UTF-8 sequence. Example: [#61714](https://github.com/ClickHouse/ClickHouse/issues/61714#issuecomment-2012768202). [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v23.8.13.25-lts.md b/docs/changelogs/v23.8.13.25-lts.md index 3452621556a..e9c6e2e9f28 100644 --- a/docs/changelogs/v23.8.13.25-lts.md +++ b/docs/changelogs/v23.8.13.25-lts.md @@ -15,11 +15,11 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix REPLACE/MOVE PARTITION with zero-copy replication [#54193](https://github.com/ClickHouse/ClickHouse/pull/54193) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)). -* Cancel merges before removing moved parts [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Mark CANNOT_PARSE_ESCAPE_SEQUENCE error as parse error to be able to skip it in row input formats [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). -* Try to fix segfault in Hive engine [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#62898](https://github.com/ClickHouse/ClickHouse/issues/62898): Fixed a bug in zero-copy replication (an experimental feature) that could cause `The specified key does not exist` errors and data loss after REPLACE/MOVE PARTITION. A similar issue might happen with TTL-moves between disks. [#54193](https://github.com/ClickHouse/ClickHouse/pull/54193) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#61964](https://github.com/ClickHouse/ClickHouse/issues/61964): Fix the ATTACH query with the ON CLUSTER clause when the database does not exist on the initiator node. Closes [#55009](https://github.com/ClickHouse/ClickHouse/issues/55009). [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#62527](https://github.com/ClickHouse/ClickHouse/issues/62527): Fix data race between `MOVE PARTITION` query and merges resulting in intersecting parts. [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#62238](https://github.com/ClickHouse/ClickHouse/issues/62238): Fix skipping escape sequcne parsing errors during JSON data parsing while using `input_format_allow_errors_num/ratio` settings. [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#62673](https://github.com/ClickHouse/ClickHouse/issues/62673): Fix segmentation fault when using Hive table engine. Reference [#62154](https://github.com/ClickHouse/ClickHouse/issues/62154), [#62560](https://github.com/ClickHouse/ClickHouse/issues/62560). [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v23.8.14.6-lts.md b/docs/changelogs/v23.8.14.6-lts.md index 0053502a9dc..3236c931e51 100644 --- a/docs/changelogs/v23.8.14.6-lts.md +++ b/docs/changelogs/v23.8.14.6-lts.md @@ -9,6 +9,6 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Set server name for SSL handshake in MongoDB engine [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)). -* Use user specified db instead of "config" for MongoDB wire protocol version check [#63126](https://github.com/ClickHouse/ClickHouse/pull/63126) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#63172](https://github.com/ClickHouse/ClickHouse/issues/63172): Setting server_name might help with recently reported SSL handshake error when connecting to MongoDB Atlas: `Poco::Exception. Code: 1000, e.code() = 0, SSL Exception: error:10000438:SSL routines:OPENSSL_internal:TLSV1_ALERT_INTERNAL_ERROR`. [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#63164](https://github.com/ClickHouse/ClickHouse/issues/63164): The wire protocol version check for MongoDB used to try accessing "config" database, but this can fail if the user doesn't have permissions for it. The fix is to use the database name provided by user. [#63126](https://github.com/ClickHouse/ClickHouse/pull/63126) ([Alexander Gololobov](https://github.com/davenger)). diff --git a/docs/changelogs/v23.8.2.7-lts.md b/docs/changelogs/v23.8.2.7-lts.md index 317e2c6d56a..a6f74e7998c 100644 --- a/docs/changelogs/v23.8.2.7-lts.md +++ b/docs/changelogs/v23.8.2.7-lts.md @@ -9,8 +9,8 @@ sidebar_label: 2023 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix: parallel replicas over distributed don't read from all replicas [#54199](https://github.com/ClickHouse/ClickHouse/pull/54199) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix: allow IPv6 for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#54209](https://github.com/ClickHouse/ClickHouse/issues/54209): Parallel reading from replicas over Distributed table was using only one replica per shard. [#54199](https://github.com/ClickHouse/ClickHouse/pull/54199) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#54233](https://github.com/ClickHouse/ClickHouse/issues/54233): Allow IPv6 for bloom filter, backward compatibility issue. [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). #### NOT FOR CHANGELOG / INSIGNIFICANT diff --git a/docs/changelogs/v23.8.3.48-lts.md b/docs/changelogs/v23.8.3.48-lts.md index af669c5adc8..91514f48a25 100644 --- a/docs/changelogs/v23.8.3.48-lts.md +++ b/docs/changelogs/v23.8.3.48-lts.md @@ -18,19 +18,19 @@ sidebar_label: 2023 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix: moved to prewhere condition actions can lose column [#53492](https://github.com/ClickHouse/ClickHouse/pull/53492) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix: parallel replicas over distributed with prefer_localhost_replica=1 [#54334](https://github.com/ClickHouse/ClickHouse/pull/54334) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix possible error 'URI contains invalid characters' in s3 table function [#54373](https://github.com/ClickHouse/ClickHouse/pull/54373) ([Kruglov Pavel](https://github.com/Avogar)). -* Check for overflow before addition in `analysisOfVariance` function [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)). -* reproduce and fix the bug in removeSharedRecursive [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)). -* Fix aggregate projections with normalized states [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)). -* Fix possible parsing error in WithNames formats with disabled input_format_with_names_use_header [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix zero copy garbage [#54550](https://github.com/ClickHouse/ClickHouse/pull/54550) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix race in `ColumnUnique` [#54575](https://github.com/ClickHouse/ClickHouse/pull/54575) ([Nikita Taranov](https://github.com/nickitat)). -* Fix serialization of `ColumnDecimal` [#54601](https://github.com/ClickHouse/ClickHouse/pull/54601) ([Nikita Taranov](https://github.com/nickitat)). -* Fix virtual columns having incorrect values after ORDER BY [#54811](https://github.com/ClickHouse/ClickHouse/pull/54811) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix Keeper segfault during shutdown [#54841](https://github.com/ClickHouse/ClickHouse/pull/54841) ([Antonio Andelic](https://github.com/antonio2368)). -* Rebuild minmax_count_projection when partition key gets modified [#54943](https://github.com/ClickHouse/ClickHouse/pull/54943) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#54974](https://github.com/ClickHouse/ClickHouse/issues/54974): Fixed issue when during prewhere optimization compound condition actions DAG can lose output column of intermediate step while this column is required as an input column of some next step. [#53492](https://github.com/ClickHouse/ClickHouse/pull/53492) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#54996](https://github.com/ClickHouse/ClickHouse/issues/54996): Parallel replicas either executed completely on the local replica or produce an incorrect result when `prefer_localhost_replica=1`. Fixes [#54276](https://github.com/ClickHouse/ClickHouse/issues/54276). [#54334](https://github.com/ClickHouse/ClickHouse/pull/54334) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#54516](https://github.com/ClickHouse/ClickHouse/issues/54516): Fix possible error 'URI contains invalid characters' in s3 table function. Closes [#54345](https://github.com/ClickHouse/ClickHouse/issues/54345). [#54373](https://github.com/ClickHouse/ClickHouse/pull/54373) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#54418](https://github.com/ClickHouse/ClickHouse/issues/54418): Check for overflow when handling group number argument for `analysisOfVariance` to avoid crashes. Crash found using WINGFUZZ. [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#54527](https://github.com/ClickHouse/ClickHouse/issues/54527): Reproduce the bug described here [#54135](https://github.com/ClickHouse/ClickHouse/issues/54135). [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)). +* Backported in [#54854](https://github.com/ClickHouse/ClickHouse/issues/54854): Fix incorrect aggregation projection optimization when using variant aggregate states. This optimization is accidentally enabled but not properly implemented, because after https://github.com/ClickHouse/ClickHouse/pull/39420 the comparison of DataTypeAggregateFunction is normalized. This fixes [#54406](https://github.com/ClickHouse/ClickHouse/issues/54406). [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#54599](https://github.com/ClickHouse/ClickHouse/issues/54599): Fix parsing error in WithNames formats while reading subset of columns with disabled input_format_with_names_use_header. Closes [#52591](https://github.com/ClickHouse/ClickHouse/issues/52591). [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#54594](https://github.com/ClickHouse/ClickHouse/issues/54594): Starting from version 23.5, zero-copy replication could leave some garbage in ZooKeeper and on S3. It might happen on removal of Outdated parts that were mutated. The issue is indicated by `Failed to get mutation parent on {} for part {}, refusing to remove blobs` log messages. [#54550](https://github.com/ClickHouse/ClickHouse/pull/54550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#54627](https://github.com/ClickHouse/ClickHouse/issues/54627): Fix unsynchronised write to a shared variable in `ColumnUnique`. [#54575](https://github.com/ClickHouse/ClickHouse/pull/54575) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#54625](https://github.com/ClickHouse/ClickHouse/issues/54625): Fix serialization of `ColumnDecimal`. [#54601](https://github.com/ClickHouse/ClickHouse/pull/54601) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#54945](https://github.com/ClickHouse/ClickHouse/issues/54945): Fixed virtual columns (e.g. _file) showing incorrect values with ORDER BY. [#54811](https://github.com/ClickHouse/ClickHouse/pull/54811) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#54872](https://github.com/ClickHouse/ClickHouse/issues/54872): Keeper fix: correctly capture a variable in callback to avoid segfaults during shutdown. [#54841](https://github.com/ClickHouse/ClickHouse/pull/54841) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#54950](https://github.com/ClickHouse/ClickHouse/issues/54950): Fix projection optimization error if table's partition key was ALTERed by extending its Enum type. The fix is to rebuild `minmax_count_projection` when partition key gets modified. This fixes [#54941](https://github.com/ClickHouse/ClickHouse/issues/54941). [#54943](https://github.com/ClickHouse/ClickHouse/pull/54943) ([Amos Bird](https://github.com/amosbird)). #### NOT FOR CHANGELOG / INSIGNIFICANT diff --git a/docs/changelogs/v23.8.4.69-lts.md b/docs/changelogs/v23.8.4.69-lts.md index 065a57549be..a6d8d8bb03b 100644 --- a/docs/changelogs/v23.8.4.69-lts.md +++ b/docs/changelogs/v23.8.4.69-lts.md @@ -11,26 +11,26 @@ sidebar_label: 2023 * Backported in [#55673](https://github.com/ClickHouse/ClickHouse/issues/55673): If the database is already initialized, it doesn't need to be initialized again upon subsequent launches. This can potentially fix the issue of infinite container restarts when the database fails to load within 1000 attempts (relevant for very large databases and multi-node setups). [#50724](https://github.com/ClickHouse/ClickHouse/pull/50724) ([Alexander Nikolaev](https://github.com/AlexNik)). * Backported in [#55293](https://github.com/ClickHouse/ClickHouse/issues/55293): Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checkouting submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)). * Backported in [#55366](https://github.com/ClickHouse/ClickHouse/issues/55366): Solve issue with launching standalone clickhouse-keeper from clickhouse-server package. [#55226](https://github.com/ClickHouse/ClickHouse/pull/55226) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Backported in [#55725](https://github.com/ClickHouse/ClickHouse/issues/55725): Fix integration check python script to use gh api url - Add Readme for CI tests. [#55716](https://github.com/ClickHouse/ClickHouse/pull/55716) ([Max K.](https://github.com/mkaynov)). +* Backported in [#55725](https://github.com/ClickHouse/ClickHouse/issues/55725): Fix integration check python script to use gh api url - Add Readme for CI tests. [#55716](https://github.com/ClickHouse/ClickHouse/pull/55716) ([Max K.](https://github.com/maxknv)). #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix "Invalid number of rows in Chunk" in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Move obsolete format settings to separate section [#54855](https://github.com/ClickHouse/ClickHouse/pull/54855) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix: insert quorum w/o keeper retries [#55026](https://github.com/ClickHouse/ClickHouse/pull/55026) ([Igor Nikonov](https://github.com/devcrafter)). -* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Proper cleanup in case of exception in ctor of ShellCommandSource [#55103](https://github.com/ClickHouse/ClickHouse/pull/55103) ([Alexander Gololobov](https://github.com/davenger)). -* Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)). -* Fix for background download in fs cache [#55252](https://github.com/ClickHouse/ClickHouse/pull/55252) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix functions execution over sparse columns [#55275](https://github.com/ClickHouse/ClickHouse/pull/55275) ([Azat Khuzhin](https://github.com/azat)). -* Fix bug with inability to drop detached partition in replicated merge tree on top of S3 without zero copy [#55309](https://github.com/ClickHouse/ClickHouse/pull/55309) ([alesapin](https://github.com/alesapin)). -* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)). -* Fix filtering by virtual columns with OR filter in query [#55418](https://github.com/ClickHouse/ClickHouse/pull/55418) ([Azat Khuzhin](https://github.com/azat)). -* Fix MongoDB connection issues [#55419](https://github.com/ClickHouse/ClickHouse/pull/55419) ([Nikolay Degterinsky](https://github.com/evillique)). -* Destroy fiber in case of exception in cancelBefore in AsyncTaskExecutor [#55516](https://github.com/ClickHouse/ClickHouse/pull/55516) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash in QueryNormalizer with cyclic aliases [#55602](https://github.com/ClickHouse/ClickHouse/pull/55602) ([vdimir](https://github.com/vdimir)). -* Fix filtering by virtual columns with OR filter in query (resubmit) [#55678](https://github.com/ClickHouse/ClickHouse/pull/55678) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#55304](https://github.com/ClickHouse/ClickHouse/issues/55304): Fix "Invalid number of rows in Chunk" in MaterializedPostgreSQL (which could happen with PostgreSQL version >= 13). [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#55018](https://github.com/ClickHouse/ClickHouse/issues/55018): Move obsolete format settings to separate section and use it together with all format settings to avoid exceptions `Unknown setting` during use of obsolete format settings. Closes [#54792](https://github.com/ClickHouse/ClickHouse/issues/54792) ### Documentation entry for user-facing changes. [#54855](https://github.com/ClickHouse/ClickHouse/pull/54855) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#55097](https://github.com/ClickHouse/ClickHouse/issues/55097): Insert quorum could be marked as satisfied incorrectly in case of keeper retries while waiting for the quorum. Fixes [#54543](https://github.com/ClickHouse/ClickHouse/issues/54543). [#55026](https://github.com/ClickHouse/ClickHouse/pull/55026) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#55473](https://github.com/ClickHouse/ClickHouse/issues/55473): Prevent attaching partitions from tables that doesn't have the same indices or projections defined. [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#55461](https://github.com/ClickHouse/ClickHouse/issues/55461): If an exception happens in `ShellCommandSource` constructor after some of the `send_data_threads` are started, they need to be join()-ed, otherwise abort() will be triggered in `ThreadFromGlobalPool` destructor. Fixes [#55091](https://github.com/ClickHouse/ClickHouse/issues/55091). [#55103](https://github.com/ClickHouse/ClickHouse/pull/55103) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#55412](https://github.com/ClickHouse/ClickHouse/issues/55412): Fix deadlock in LDAP assigned role update for non-existing ClickHouse roles. [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)). +* Backported in [#55323](https://github.com/ClickHouse/ClickHouse/issues/55323): Fix for background download in fs cache. [#55252](https://github.com/ClickHouse/ClickHouse/pull/55252) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#55349](https://github.com/ClickHouse/ClickHouse/issues/55349): Fix functions execution over sparse columns (fixes `DB::Exception: isDefaultAt is not implemented for Function: while executing 'FUNCTION Capture` error). [#55275](https://github.com/ClickHouse/ClickHouse/pull/55275) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#55475](https://github.com/ClickHouse/ClickHouse/issues/55475): Fix an issue with inability to drop detached partition in `ReplicatedMergeTree` engines family on top of S3 (without zero-copy replication). Fixes issue [#55225](https://github.com/ClickHouse/ClickHouse/issues/55225). Fix bug with abandoned blobs on S3 for complex data types like Arrays or Nested columns. Partially fixes [#52393](https://github.com/ClickHouse/ClickHouse/issues/52393). Many kudos to @alifirat for examples. [#55309](https://github.com/ClickHouse/ClickHouse/pull/55309) ([alesapin](https://github.com/alesapin)). +* Backported in [#55399](https://github.com/ClickHouse/ClickHouse/issues/55399): An optimization introduced one year ago was wrong. This closes [#55272](https://github.com/ClickHouse/ClickHouse/issues/55272). [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#55437](https://github.com/ClickHouse/ClickHouse/issues/55437): Fix parsing of arrays in cast operator (`::`). [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#55635](https://github.com/ClickHouse/ClickHouse/issues/55635): Fix filtering by virtual columns with OR filter in query (`_part*` filtering for `MergeTree`, `_path`/`_file` for various `File`/`HDFS`/... engines, `_table` for `Merge`). [#55418](https://github.com/ClickHouse/ClickHouse/pull/55418) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#55445](https://github.com/ClickHouse/ClickHouse/issues/55445): Fix connection issues that occurred with some versions of MongoDB. Closes [#55376](https://github.com/ClickHouse/ClickHouse/issues/55376), [#55232](https://github.com/ClickHouse/ClickHouse/issues/55232). [#55419](https://github.com/ClickHouse/ClickHouse/pull/55419) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#55534](https://github.com/ClickHouse/ClickHouse/issues/55534): Fix possible deadlock caused by not destroyed fiber in case of exception in async task cancellation. Closes [#55185](https://github.com/ClickHouse/ClickHouse/issues/55185). [#55516](https://github.com/ClickHouse/ClickHouse/pull/55516) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#55747](https://github.com/ClickHouse/ClickHouse/issues/55747): Fix crash in QueryNormalizer with cyclic aliases. [#55602](https://github.com/ClickHouse/ClickHouse/pull/55602) ([vdimir](https://github.com/vdimir)). +* Backported in [#55760](https://github.com/ClickHouse/ClickHouse/issues/55760): Fix filtering by virtual columns with OR filter in query (_part* filtering for MergeTree, _path/_file for various File/HDFS/... engines, _table for Merge). [#55678](https://github.com/ClickHouse/ClickHouse/pull/55678) ([Azat Khuzhin](https://github.com/azat)). #### NO CL CATEGORY @@ -46,6 +46,6 @@ sidebar_label: 2023 * Clean data dir and always start an old server version in aggregate functions compatibility test. [#55105](https://github.com/ClickHouse/ClickHouse/pull/55105) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * check if block is empty after async insert retries [#55143](https://github.com/ClickHouse/ClickHouse/pull/55143) ([Han Fei](https://github.com/hanfei1991)). * MaterializedPostgreSQL: remove back check [#55297](https://github.com/ClickHouse/ClickHouse/pull/55297) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Remove existing moving/ dir if allow_remove_stale_moving_parts is off [#55480](https://github.com/ClickHouse/ClickHouse/pull/55480) ([Mike Kot](https://github.com/myrrc)). +* Remove existing moving/ dir if allow_remove_stale_moving_parts is off [#55480](https://github.com/ClickHouse/ClickHouse/pull/55480) ([Mikhail Kot](https://github.com/myrrc)). * Bump curl to 8.4 [#55492](https://github.com/ClickHouse/ClickHouse/pull/55492) ([Robert Schulze](https://github.com/rschu1ze)). diff --git a/docs/changelogs/v23.8.5.16-lts.md b/docs/changelogs/v23.8.5.16-lts.md index 4a23b8892be..32ddbd6031d 100644 --- a/docs/changelogs/v23.8.5.16-lts.md +++ b/docs/changelogs/v23.8.5.16-lts.md @@ -12,9 +12,9 @@ sidebar_label: 2023 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix storage Iceberg files retrieval [#55144](https://github.com/ClickHouse/ClickHouse/pull/55144) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Try to fix possible segfault in Native ORC input format [#55891](https://github.com/ClickHouse/ClickHouse/pull/55891) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix window functions in case of sparse columns. [#55895](https://github.com/ClickHouse/ClickHouse/pull/55895) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#55736](https://github.com/ClickHouse/ClickHouse/issues/55736): Fix iceberg metadata parsing - delete files were not checked. [#55144](https://github.com/ClickHouse/ClickHouse/pull/55144) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#55969](https://github.com/ClickHouse/ClickHouse/issues/55969): Try to fix possible segfault in Native ORC input format. Closes [#55873](https://github.com/ClickHouse/ClickHouse/issues/55873). [#55891](https://github.com/ClickHouse/ClickHouse/pull/55891) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#55907](https://github.com/ClickHouse/ClickHouse/issues/55907): Fix window functions in case of sparse columns. Previously some queries with window functions returned invalid results or made ClickHouse crash when the columns were sparse. [#55895](https://github.com/ClickHouse/ClickHouse/pull/55895) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). #### NOT FOR CHANGELOG / INSIGNIFICANT diff --git a/docs/changelogs/v23.8.6.16-lts.md b/docs/changelogs/v23.8.6.16-lts.md index 6eb752e987c..df6c03cd668 100644 --- a/docs/changelogs/v23.8.6.16-lts.md +++ b/docs/changelogs/v23.8.6.16-lts.md @@ -9,11 +9,11 @@ sidebar_label: 2023 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix rare case of CHECKSUM_DOESNT_MATCH error [#54549](https://github.com/ClickHouse/ClickHouse/pull/54549) ([alesapin](https://github.com/alesapin)). -* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#54583](https://github.com/ClickHouse/ClickHouse/issues/54583): Fix rare bug in replicated merge tree which could lead to self-recovering `CHECKSUM_DOESNT_MATCH` error in logs. [#54549](https://github.com/ClickHouse/ClickHouse/pull/54549) ([alesapin](https://github.com/alesapin)). +* Backported in [#56253](https://github.com/ClickHouse/ClickHouse/issues/56253): Fixed bug of match() function (regex) with pattern containing alternation produces incorrect key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#56322](https://github.com/ClickHouse/ClickHouse/issues/56322): Fix a crash during table loading on startup. Closes [#55767](https://github.com/ClickHouse/ClickHouse/issues/55767). [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#56292](https://github.com/ClickHouse/ClickHouse/issues/56292): Fix segfault in signal handler for Keeper. [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#56443](https://github.com/ClickHouse/ClickHouse/issues/56443): Fix crash due to buffer overflow while decompressing malformed data using `T64` codec. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### NOT FOR CHANGELOG / INSIGNIFICANT diff --git a/docs/changelogs/v23.8.7.24-lts.md b/docs/changelogs/v23.8.7.24-lts.md index 37862c17315..042484e2404 100644 --- a/docs/changelogs/v23.8.7.24-lts.md +++ b/docs/changelogs/v23.8.7.24-lts.md @@ -12,12 +12,12 @@ sidebar_label: 2023 #### Bug Fix (user-visible misbehavior in an official stable release) -* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#56581](https://github.com/ClickHouse/ClickHouse/issues/56581): Prevent reference to a remote data source for the `data_paths` column in `system.tables` if the table is created with a table function using explicit column description. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Backported in [#56877](https://github.com/ClickHouse/ClickHouse/issues/56877): Fix incomplete query result for `UNION` in `view()` table function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#56409](https://github.com/ClickHouse/ClickHouse/issues/56409): Prohibit adding a column with type `Object(JSON)` to an existing table. This closes: [#56095](https://github.com/ClickHouse/ClickHouse/issues/56095) This closes: [#49944](https://github.com/ClickHouse/ClickHouse/issues/49944). [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#56756](https://github.com/ClickHouse/ClickHouse/issues/56756): Fix a segfault caused by a thrown exception in Kerberos initialization during the creation of the Kafka table. Closes [#56073](https://github.com/ClickHouse/ClickHouse/issues/56073). [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#56748](https://github.com/ClickHouse/ClickHouse/issues/56748): Fixed the issue that the RabbitMQ table engine wasn't able to connect to RabbitMQ over a secure connection. [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#56839](https://github.com/ClickHouse/ClickHouse/issues/56839): The server crashed when decompressing malformed data using the `FPC` codec. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### NO CL CATEGORY diff --git a/docs/changelogs/v23.8.8.20-lts.md b/docs/changelogs/v23.8.8.20-lts.md index 345cfcccf17..f45498cb61f 100644 --- a/docs/changelogs/v23.8.8.20-lts.md +++ b/docs/changelogs/v23.8.8.20-lts.md @@ -16,9 +16,9 @@ sidebar_label: 2023 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)). -* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#57111](https://github.com/ClickHouse/ClickHouse/issues/57111): Fix ON CLUSTER queries without the database being present on an initial node. Closes [#55009](https://github.com/ClickHouse/ClickHouse/issues/55009). [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#57169](https://github.com/ClickHouse/ClickHouse/issues/57169): Fix crash due to buffer overflow while decompressing malformed data using `Gorilla` codec. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#57175](https://github.com/ClickHouse/ClickHouse/issues/57175): Close interserver connection for any exception that happens before the authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)). #### NOT FOR CHANGELOG / INSIGNIFICANT diff --git a/docs/changelogs/v23.8.9.54-lts.md b/docs/changelogs/v23.8.9.54-lts.md index 00607c60c39..db13238f4ad 100644 --- a/docs/changelogs/v23.8.9.54-lts.md +++ b/docs/changelogs/v23.8.9.54-lts.md @@ -11,29 +11,29 @@ sidebar_label: 2024 * Backported in [#57668](https://github.com/ClickHouse/ClickHouse/issues/57668): Output valid JSON/XML on excetpion during HTTP query execution. Add setting `http_write_exception_in_output_format` to enable/disable this behaviour (enabled by default). [#52853](https://github.com/ClickHouse/ClickHouse/pull/52853) ([Kruglov Pavel](https://github.com/Avogar)). * Backported in [#58491](https://github.com/ClickHouse/ClickHouse/issues/58491): Fix transfer query to MySQL compatible query. Fixes [#57253](https://github.com/ClickHouse/ClickHouse/issues/57253). Fixes [#52654](https://github.com/ClickHouse/ClickHouse/issues/52654). Fixes [#56729](https://github.com/ClickHouse/ClickHouse/issues/56729). [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)). * Backported in [#57238](https://github.com/ClickHouse/ClickHouse/issues/57238): Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)). -* Backported in [#57655](https://github.com/ClickHouse/ClickHouse/issues/57655): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)). +* Backported in [#57655](https://github.com/ClickHouse/ClickHouse/issues/57655): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mikhail Kot](https://github.com/myrrc)). #### Build/Testing/Packaging Improvement * Backported in [#57582](https://github.com/ClickHouse/ClickHouse/issues/57582): Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). #### Bug Fix (user-visible misbehavior in an official stable release) -* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)). -* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)). -* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)). -* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). -* bugfix: correctly parse SYSTEM STOP LISTEN TCP SECURE [#57483](https://github.com/ClickHouse/ClickHouse/pull/57483) ([joelynch](https://github.com/joelynch)). -* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)). -* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). -* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)). -* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#58324](https://github.com/ClickHouse/ClickHouse/issues/58324): Flatten only true Nested type if flatten_nested=1, not all Array(Tuple). [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#57395](https://github.com/ClickHouse/ClickHouse/issues/57395): Fix ALTER COLUMN with ALIAS that previously threw the `NO_SUCH_COLUMN_IN_TABLE` exception. Closes [#50927](https://github.com/ClickHouse/ClickHouse/issues/50927). [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#57449](https://github.com/ClickHouse/ClickHouse/issues/57449): Now ALTER columns which are incompatible with columns used in some projections will be forbidden. Previously it could result in incorrect data. This fixes [#56932](https://github.com/ClickHouse/ClickHouse/issues/56932). This PR also allows RENAME of index columns, and improves the exception message by providing clear information on the affected indices or projections causing the prevention. [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#57281](https://github.com/ClickHouse/ClickHouse/issues/57281): Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column. Closes [#42918](https://github.com/ClickHouse/ClickHouse/issues/42918). [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#57247](https://github.com/ClickHouse/ClickHouse/issues/57247): Fix incorrect JOIN plan optimization with partially materialized normal projection. This fixes [#57194](https://github.com/ClickHouse/ClickHouse/issues/57194). [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#57346](https://github.com/ClickHouse/ClickHouse/issues/57346): Fix `ReadonlyReplica` metric for some cases (e.g. when a table cannot be initialized because of difference in local and Keeper data). [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#58434](https://github.com/ClickHouse/ClickHouse/issues/58434): Fix working with read buffers in StreamingFormatExecutor, previously it could lead to segfaults in Kafka and other streaming engines. [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#57539](https://github.com/ClickHouse/ClickHouse/issues/57539): Fix parsing of `SYSTEM STOP LISTEN TCP SECURE`. [#57483](https://github.com/ClickHouse/ClickHouse/pull/57483) ([joelynch](https://github.com/joelynch)). +* Backported in [#57779](https://github.com/ClickHouse/ClickHouse/issues/57779): Conf ``` /clickhouse/access/ ``` sql ``` show settings like 'ignore_on_cluster_for_replicated_access_entities_queries' ┌─name─────────────────────────────────────────────────────┬─type─┬─value─┐ │ ignore_on_cluster_for_replicated_access_entities_queries │ bool │ 1 │ └──────────────────────────────────────────────────────────┴──────┴───────┘. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Backported in [#58256](https://github.com/ClickHouse/ClickHouse/issues/58256): Disable system.kafka_consumers by default (due to possible live memory leak). [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#57923](https://github.com/ClickHouse/ClickHouse/issues/57923): Fix invalid memory access in BLAKE3. [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#58084](https://github.com/ClickHouse/ClickHouse/issues/58084): Normilize function names in `CREATE INDEX` query. Avoid `Existing table metadata in ZooKeeper differs in skip indexes` errors if an alias was used insead of canonical function name when creating an index. [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#58110](https://github.com/ClickHouse/ClickHouse/issues/58110): Keeper fix: Leader should correctly fail on preprocessing a request if it is not initialized. [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#58155](https://github.com/ClickHouse/ClickHouse/issues/58155): Fix Integer overflow in Poco::UTF32Encoding. [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)). +* Backported in [#58188](https://github.com/ClickHouse/ClickHouse/issues/58188): Parallel parsing for `JSONCompactEachRow` could work incorrectly in previous versions. This closes [#58180](https://github.com/ClickHouse/ClickHouse/issues/58180). [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#58301](https://github.com/ClickHouse/ClickHouse/issues/58301): Fix parallel parsing for JSONCompactEachRow. [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)). #### NO CL ENTRY diff --git a/docs/changelogs/v24.1.1.2048-stable.md b/docs/changelogs/v24.1.1.2048-stable.md index 8e4647da86e..c509ce0058e 100644 --- a/docs/changelogs/v24.1.1.2048-stable.md +++ b/docs/changelogs/v24.1.1.2048-stable.md @@ -133,56 +133,56 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Add join keys conversion for nested lowcardinality [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)). -* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix a bug with projections and the aggregate_functions_null_for_empty setting during insertion. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)). -* Fixed potential exception due to stale profile UUID [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)). -* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). -* Ignore MVs with dropped target table during pushing to views [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)). -* [RFC] Eliminate possible race between ALTER_METADATA and MERGE_PARTS [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)). -* Fix the exprs order bug in group by with rollup [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)). -* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Allow users to work with symlinks in user_files_path (again) [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)). -* Fix segfault when graphite table does not have agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)). -* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)). -* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)). -* Enable ordinary databases while restoration [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)). -* Fix hive threadpool read ORC/Parquet/... Failed [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)). -* Hide credentials in system.backup_log base_backup_name column [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)). -* toStartOfInterval for milli- microsencods values rounding [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)). -* Fix join using nullable in old analyzer [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)). -* `makeDateTime64()`: Allow non-const fraction argument [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix possible NULL dereference during symbolizing inline frames [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)). -* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix broken partition key analysis when doing projection optimization [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)). -* Query cache: Fix per-user quota [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)). -* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). -* Don't process requests in Keeper during shutdown [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix Segfault in `SlabsPolygonIndex::find` [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). -* Table CREATE DROP Poco::Logger memory leak fix [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix HTTP compressors finalization [#58846](https://github.com/ClickHouse/ClickHouse/pull/58846) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Restriction for the access key id for s3. [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Fix possible crash in clickhouse-local during loading suggestions [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash when indexHint() is used [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)). -* Fix StorageURL forgetting headers on server restart [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)). -* Analyzer: fix storage replacement with insertion block [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix seek in ReadBufferFromZipArchive [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)). -* `DROP INDEX` of inverted index now removes all relevant files from persistence [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)). -* Fix data race on query_factories_info [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disable "Too many redirects" error retry [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)). -* Fix aggregation issue in mixed x86_64 and ARM clusters [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Fix not started database shutdown deadlock [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)). -* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix crash with nullable timezone for `toString` [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Fix abort in iceberg metadata on bad file paths [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix architecture name in select of Rust target [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)). -* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix lazy initialization in RabbitMQ [#59352](https://github.com/ClickHouse/ClickHouse/pull/59352) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible errors when joining sub-types with low cardinality (e.g., Array(LowCardinality(T)) with Array(T)). [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)). +* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple). [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug with projections and the `aggregate_functions_null_for_empty` setting during insertion. This is an addition to [#42198](https://github.com/ClickHouse/ClickHouse/issues/42198) and [#49873](https://github.com/ClickHouse/ClickHouse/issues/49873). The bug was found by fuzzer in [#56666](https://github.com/ClickHouse/ClickHouse/issues/56666). This PR also fix potential issues with projections and the `transform_null_in` setting. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)). +* Fixed (a rare) exception in case when user's assigned profiles are updated right after user logging in, which could cause a missing entry in `session_log` or problems with logging in. [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix working with read buffers in StreamingFormatExecutor, previously it could lead to segfaults in Kafka and other streaming engines. [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). +* Ignore MVs with dropped target table during pushing to views in insert to a source table. [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)). +* Eliminate possible race between ALTER_METADATA and MERGE_PARTS (that leads to checksum mismatch - CHECKSUM_DOESNT_MATCH). [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)). +* Fix the exprs order bug in group by with rollup. [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)). +* Fix a bug in zero-copy-replication (an experimental feature) that could lead to `The specified key does not exist` error and data loss. It could happen when dropping a replica with broken or unexpected/ignored detached parts. Fixes [#57985](https://github.com/ClickHouse/ClickHouse/issues/57985). [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix a bug that users cannot work with symlinks in user_files_path. [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)). +* Fix segfault when graphite table does not have agg function. [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)). +* Fix reading multiple times from KafkaEngine in materialized views. [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `Part ... intersects part ...` error that might occur in `ReplicatedMergeTree` when the server was restarted just after [automatically] dropping [an empty] part and adjacent parts were merged. The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/56282. [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTreePrefetchedReadPool disable for LIMIT only queries, because time spend during filling per thread tasks can be greater than whole query execution for big tables with small limit. [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)). +* While `restore` is underway in Clickhouse, restore should allow the database with an `ordinary` engine. [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)). +* Fix read buffer creation in Hive engine when thread_pool read method is used. Closes [#57978](https://github.com/ClickHouse/ClickHouse/issues/57978). [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)). +* Hide credentials in `base_backup_name` column of `system.backup_log`. [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* While executing queries like `SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(1));`, the result was not rounded to 1 millisecond previously. Current PR solves this issue. Also, current PR will solve some problems appearing in https://github.com/ClickHouse/ClickHouse/pull/56738. [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix logical error in `parallel_hash` working with `max_joined_block_size_rows`. [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)). +* Fix error in join with `USING` when one of the table has `Nullable` key. [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)). +* The (optional) `fraction` argument in function `makeDateTime64()` can now be non-const. This was possible already with ClickHouse <= 23.8. [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible server crash during symbolizing inline frames. [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)). +* The query cache now denies access to entries when the user is re-created or assumes another role. This improves prevents attacks where 1. an user with the same name as a dropped user may access the old user's cache entries or 2. a user with a different role may access cache entries of a role with a different row policy. [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix broken partition key analysis when doing projection optimization with `force_index_by_date = 1`. This fixes [#58620](https://github.com/ClickHouse/ClickHouse/issues/58620). We don't need partition key analysis for projections after https://github.com/ClickHouse/ClickHouse/pull/56502 . [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)). +* The query cache now behaves properly when per-user quotas are defined and `SYSTEM DROP QUERY CACHE` ran. [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix data stream partitioning for window functions when there are different window descriptions with similar prefixes but different partitioning. Fixes [#58714](https://github.com/ClickHouse/ClickHouse/issues/58714). [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)). +* Fix double destroy call on exception throw in addBatchLookupTable8. [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Keeper fix: don't process requests during shutdown because it will lead to invalid state. [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a crash in the polygon dictionary. Fixes [#58612](https://github.com/ClickHouse/ClickHouse/issues/58612). [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix possible crash in JSONExtract function extracting `LowCardinality(Nullable(T))` type. [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* Table CREATE DROP `Poco::Logger` memory leak fix. Closes [#57931](https://github.com/ClickHouse/ClickHouse/issues/57931). Closes [#58496](https://github.com/ClickHouse/ClickHouse/issues/58496). [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix HTTP compressors. Follow-up [#58475](https://github.com/ClickHouse/ClickHouse/issues/58475). [#58846](https://github.com/ClickHouse/ClickHouse/pull/58846) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix reading multiple times from FileLog engine in materialized views. [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Prevent specifying an `access_key_id` that does not match the correct [correct pattern]( https://docs.aws.amazon.com/IAM/latest/APIReference/API_AccessKey.html). [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible crash in clickhouse-local during loading suggestions. Closes [#58825](https://github.com/ClickHouse/ClickHouse/issues/58825). [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash when `indexHint` function is used without arguments in the filters. [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)). +* Fixed URL and S3 engines losing the `headers` argument on server restart. [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix analyzer - insertion from select with subquery referencing insertion table should process only insertion block for all table expressions. Fixes [#58080](https://github.com/ClickHouse/ClickHouse/issues/58080). follow-up [#50857](https://github.com/ClickHouse/ClickHouse/issues/50857). [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fixed reading parquet files from archives. [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)). +* Experimental feature of inverted indices: `ALTER TABLE DROP INDEX` for an inverted index now removes all inverted index files from the new part (issue [#59039](https://github.com/ClickHouse/ClickHouse/issues/59039)). [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)). +* Fix data race on collecting factories info for system.query_log. [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixs: [#58967](https://github.com/ClickHouse/ClickHouse/issues/58967). [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)). +* Fixed wrong aggregation results in mixed x86_64 and ARM clusters. [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix a deadlock that can happen during the shutdown of the server due to metadata loading failure. [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)). +* The combination of LIMIT BY and LIMIT could produce an incorrect result in distributed queries (parallel replicas included). [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fixes crash with for `toString()` with timezone in nullable format. Fixes [#59126](https://github.com/ClickHouse/ClickHouse/issues/59126). [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix abort in iceberg metadata on bad file paths. [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix architecture name in select of Rust target. [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)). +* Fix `Not-ready Set` for queries from `system.tables` with `table IN (subquery)` filter expression. Fixes [#59342](https://github.com/ClickHouse/ClickHouse/issues/59342). [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix lazy initialization in RabbitMQ that could lead to logical error and not initialized state. [#59352](https://github.com/ClickHouse/ClickHouse/pull/59352) ([Kruglov Pavel](https://github.com/Avogar)). #### NO CL ENTRY diff --git a/docs/changelogs/v24.1.2.5-stable.md b/docs/changelogs/v24.1.2.5-stable.md index bac25c9b9ed..080e24da6f0 100644 --- a/docs/changelogs/v24.1.2.5-stable.md +++ b/docs/changelogs/v24.1.2.5-stable.md @@ -9,6 +9,6 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). -* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#59425](https://github.com/ClickHouse/ClickHouse/issues/59425): Fix translate() with FixedString input. Could lead to crashes as it'd return a String column (vs the expected FixedString). This issue was found through ClickHouse Bug Bounty Program YohannJardin. [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#59478](https://github.com/ClickHouse/ClickHouse/issues/59478): Fix stacktraces for binaries without debug symbols. [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v24.1.3.31-stable.md b/docs/changelogs/v24.1.3.31-stable.md index 046ca451fbc..ec73672c8d5 100644 --- a/docs/changelogs/v24.1.3.31-stable.md +++ b/docs/changelogs/v24.1.3.31-stable.md @@ -13,13 +13,13 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). -* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). -* Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). -* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)). -* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)). -* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#59726](https://github.com/ClickHouse/ClickHouse/issues/59726): Fix formatting of alter commands in case of column specific settings. [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#59585](https://github.com/ClickHouse/ClickHouse/issues/59585): Make MAX use the same rules as permutation for complex types. [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#59579](https://github.com/ClickHouse/ClickHouse/issues/59579): Fix a corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` setting. There is one corner case not covered due to the absence of tables in the path:. [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). +* Backported in [#59647](https://github.com/ClickHouse/ClickHouse/issues/59647): Fix incorrect result of arrayElement / map[] on empty value. [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#59639](https://github.com/ClickHouse/ClickHouse/issues/59639): Fix crash in topK when merging empty states. [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#59696](https://github.com/ClickHouse/ClickHouse/issues/59696): Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor. [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#59764](https://github.com/ClickHouse/ClickHouse/issues/59764): Fix leftPad / rightPad function with FixedString input. [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). #### NO CL ENTRY diff --git a/docs/changelogs/v24.1.4.20-stable.md b/docs/changelogs/v24.1.4.20-stable.md index 8612a485f12..1baec2178b1 100644 --- a/docs/changelogs/v24.1.4.20-stable.md +++ b/docs/changelogs/v24.1.4.20-stable.md @@ -15,10 +15,10 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). -* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#59457](https://github.com/ClickHouse/ClickHouse/issues/59457): Keeper fix: fix digest calculation for nodes. [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#59682](https://github.com/ClickHouse/ClickHouse/issues/59682): Fix distributed table with a constant sharding key. [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#59842](https://github.com/ClickHouse/ClickHouse/issues/59842): Fix query start time on non initial queries. [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#59937](https://github.com/ClickHouse/ClickHouse/issues/59937): Fix parsing of partition expressions that are surrounded by parentheses, e.g.: `ALTER TABLE test DROP PARTITION ('2023-10-19')`. [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). #### NOT FOR CHANGELOG / INSIGNIFICANT diff --git a/docs/changelogs/v24.1.5.6-stable.md b/docs/changelogs/v24.1.5.6-stable.md index ce46c51e2f4..caf246fcab6 100644 --- a/docs/changelogs/v24.1.5.6-stable.md +++ b/docs/changelogs/v24.1.5.6-stable.md @@ -9,7 +9,7 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#59959](https://github.com/ClickHouse/ClickHouse/issues/59959): Fix crash during deserialization of aggregation function states that internally use `UniqExactSet`. Introduced https://github.com/ClickHouse/ClickHouse/pull/59009. [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)). #### NOT FOR CHANGELOG / INSIGNIFICANT diff --git a/docs/changelogs/v24.1.7.18-stable.md b/docs/changelogs/v24.1.7.18-stable.md index 603a83a67be..3bc94538174 100644 --- a/docs/changelogs/v24.1.7.18-stable.md +++ b/docs/changelogs/v24.1.7.18-stable.md @@ -9,10 +9,10 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). -* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). -* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#61330](https://github.com/ClickHouse/ClickHouse/issues/61330): Fix deadlock in parallel parsing when lots of rows are skipped due to errors. [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#61008](https://github.com/ClickHouse/ClickHouse/issues/61008): Fix the issue of `max_query_size` for KQL compound operator like mv-expand. Related to [#59626](https://github.com/ClickHouse/ClickHouse/issues/59626). [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Backported in [#61019](https://github.com/ClickHouse/ClickHouse/issues/61019): Fix crash when `allow_experimental_analyzer` setting value is changed in the subqueries. [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#61293](https://github.com/ClickHouse/ClickHouse/issues/61293): Keeper: fix runtime reconfig for standalone binary. [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.1.8.22-stable.md b/docs/changelogs/v24.1.8.22-stable.md index f780de41c40..e615c60a942 100644 --- a/docs/changelogs/v24.1.8.22-stable.md +++ b/docs/changelogs/v24.1.8.22-stable.md @@ -9,12 +9,12 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). -* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bug when reading system.parts using UUID (issue 61220). [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)). -* Fix client `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). +* Backported in [#61451](https://github.com/ClickHouse/ClickHouse/issues/61451): Fix possible incorrect result of aggregate function `uniqExact`. [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#61844](https://github.com/ClickHouse/ClickHouse/issues/61844): Fixed possible wrong result of aggregation with nullable keys. [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#61746](https://github.com/ClickHouse/ClickHouse/issues/61746): Fix incorrect results when filtering `system.parts` or `system.parts_columns` using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)). +* Backported in [#61696](https://github.com/ClickHouse/ClickHouse/issues/61696): Fix `clickhouse-client -s` argument, it was broken by defining it two times. [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61576](https://github.com/ClickHouse/ClickHouse/issues/61576): Fix string search with constant start position which previously could lead to memory corruption. [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#61858](https://github.com/ClickHouse/ClickHouse/issues/61858): Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` when specifying incorrect UTF-8 sequence. Example: [#61714](https://github.com/ClickHouse/ClickHouse/issues/61714#issuecomment-2012768202). [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.2.1.2248-stable.md b/docs/changelogs/v24.2.1.2248-stable.md index 6113dd51ab1..edcd3da3852 100644 --- a/docs/changelogs/v24.2.1.2248-stable.md +++ b/docs/changelogs/v24.2.1.2248-stable.md @@ -60,7 +60,7 @@ sidebar_label: 2024 * Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)). * Implement auto-adjustment for asynchronous insert timeouts. The following settings are introduced: async_insert_poll_timeout_ms, async_insert_use_adaptive_busy_timeout, async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms, async_insert_busy_timeout_increase_rate, async_insert_busy_timeout_decrease_rate. [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ([Julia Kartseva](https://github.com/jkartseva)). * Allow to define `volume_priority` in `storage_configuration`. [#58533](https://github.com/ClickHouse/ClickHouse/pull/58533) ([Andrey Zvonov](https://github.com/zvonand)). -* Add support for Date32 type in T64 codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma](https://github.com/binmahone)). +* Add support for Date32 type in T64 codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma (Mahone)](https://github.com/binmahone)). * Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Allow trailing commas in types with several items. [#59119](https://github.com/ClickHouse/ClickHouse/pull/59119) ([Aleksandr Musorin](https://github.com/AVMusorin)). * Allow parallel and distributed processing for `S3Queue` table engine. For distributed processing use setting `s3queue_total_shards_num` (by default `1`). Setting `s3queue_processing_threads_num` previously was not allowed for Ordered processing mode, now it is allowed. Warning: settings `s3queue_processing_threads_num`(processing threads per each shard) and `s3queue_total_shards_num` for ordered mode change how metadata is stored (make the number of `max_processed_file` nodes equal to `s3queue_processing_threads_num * s3queue_total_shards_num`), so they must be the same for all shards and cannot be changed once at least one shard is created. [#59167](https://github.com/ClickHouse/ClickHouse/pull/59167) ([Kseniia Sumarokova](https://github.com/kssenii)). @@ -123,60 +123,60 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Non ready set in TTL WHERE. [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix quantilesGK bug [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([李扬](https://github.com/taiyang-li)). -* Disable parallel replicas JOIN with CTE (not analyzer) [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)). -* Fix bug with `intDiv` for decimal arguments [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). -* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). -* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)). -* Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)). -* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). -* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). -* Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). -* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)). -* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). -* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). -* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)). -* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). -* Validate types of arguments for `minmax` skipping index [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)). -* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). -* Fix AST fuzzer issue in function `countMatches` [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)). -* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix StorageURL doing some of the query execution in single thread [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)). -* s3queue: fix uninitialized value [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Fix crash in JSONColumnsWithMetadata format over http [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)). -* Do not rewrite sum() to count() if return value differs in analyzer [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)). -* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)). -* ReplicatedMergeTree invalid metadata_version fix [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix data race in `StorageDistributed` [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)). -* Run init scripts when option is enabled rather than disabled [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)). -* Fix scale conversion for DateTime64 [#60004](https://github.com/ClickHouse/ClickHouse/pull/60004) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Fix INSERT into SQLite with single quote (by escaping single quotes with a quote instead of backslash) [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)). -* Fix several logical errors in arrayFold [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)). -* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). -* Fix possible exception from s3queue table on drop [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix formatting of NOT with single literals [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)). -* Use max_query_size from context in DDLLogEntry instead of hardcoded 4096 [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix inconsistent formatting of queries [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix inconsistent formatting of explain in subqueries [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). -* Allow casting of bools in string representation to to true bools [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix system.s3queue_log [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix arrayReduce with nullable aggregate function name [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)). -* Fix actions execution during preliminary filtering (PK, partition pruning) [#60196](https://github.com/ClickHouse/ClickHouse/pull/60196) ([Azat Khuzhin](https://github.com/azat)). -* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Revert "Replace `ORDER BY ALL` by `ORDER BY *`" [#60248](https://github.com/ClickHouse/ClickHouse/pull/60248) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix http exception codes. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)). -* s3queue: fix bug (also fixes flaky test_storage_s3_queue/test.py::test_shards_distributed) [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6 [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)). -* Merging [#59674](https://github.com/ClickHouse/ClickHouse/issues/59674). [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Correctly check keys in s3Cluster [#60477](https://github.com/ClickHouse/ClickHouse/pull/60477) ([Antonio Andelic](https://github.com/antonio2368)). +* Support `IN (subquery)` in table TTL expression. Initially, it was allowed to create such a TTL expression, but any TTL merge would fail with `Not-ready Set` error in the background. Now, TTL is correctly applied. Subquery is executed for every TTL merge, and its result is not cached or reused by other merges. Use such configuration with special care, because subqueries in TTL may lead to high memory consumption and, possibly, a non-deterministic result of TTL merge on different replicas (which is correctly handled by replication, however). [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix quantilesGK bug, close [#57683](https://github.com/ClickHouse/ClickHouse/issues/57683). [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([李扬](https://github.com/taiyang-li)). +* Disable parallel replicas JOIN with CTE (not analyzer). [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)). +* Fixes bug with for function `intDiv` with decimal arguments. Fixes [#56414](https://github.com/ClickHouse/ClickHouse/issues/56414). [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix translate() with FixedString input. Could lead to crashes as it'd return a String column (vs the expected FixedString). This issue was found through ClickHouse Bug Bounty Program YohannJardin. [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). +* Keeper fix: fix digest calculation for nodes. [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix stacktraces for binaries without debug symbols. [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatting of alter commands in case of column specific settings. [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* `SELECT * FROM [...] ORDER BY ALL SETTINGS allow_experimental_analyzer = 1` now works. [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix possible uncaught exception during distributed query cancellation. Closes [#59169](https://github.com/ClickHouse/ClickHouse/issues/59169). [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)). +* Make MAX use the same rules as permutation for complex types. [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` setting. There is one corner case not covered due to the absence of tables in the path:. [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). +* Fix incorrect result of arrayElement / map[] on empty value. [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash in topK when merging empty states. [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)). +* Fix distributed table with a constant sharding key. [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix segmentation fault in KQL parser when the input query exceeds the `max_query_size`. Also re-enable the KQL dialect. Fixes [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036) and [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037). [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). +* Fix error `Read beyond last offset` for `AsynchronousBoundedReadBuffer`. [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor. [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)). +* Fix query start time on non initial queries. [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* Validate types of arguments for `minmax` skipping index. [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)). +* Fix leftPad / rightPad function with FixedString input. [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed an exception in function `countMatches` with non-const `FixedString` haystack arguments, e.g. `SELECT countMatches(materialize(toFixedString('foobarfoo', 9)), 'foo');`. [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix having neigher acked nor nacked messages. If exception happens during read-write phase, messages will be nacked. [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed queries that read a Parquet file over HTTP (url()/URL()) executing in one thread instead of max_threads. [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed uninitialized value in s3 queue, which happened during upgrade to a new version if table had Ordered mode and resulted in an error "Existing table metadata in ZooKeeper differs in s3queue_processing_threads_num setting". [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix parsing of partition expressions that are surrounded by parentheses, e.g.: `ALTER TABLE test DROP PARTITION ('2023-10-19')`. [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix crash in JSONColumnsWithMetadata format over http. Closes [#59853](https://github.com/ClickHouse/ClickHouse/issues/59853). [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not rewrite sum() to count() if return value differs in analyzer. [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash during deserialization of aggregation function states that internally use `UniqExactSet`. Introduced https://github.com/ClickHouse/ClickHouse/pull/59009. [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)). +* ReplicatedMergeTree fix invalid `metadata_version` node initialization in Zookeeper during creation of non first replica. Closes [#54902](https://github.com/ClickHouse/ClickHouse/issues/54902). [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed data race on cluster object between `StorageDistributed` and `Context::reloadClusterConfig()`. Former held const reference to its member while the latter destroyed the object (in process of replacing it with a new one). [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)). +* Fixes [#59989](https://github.com/ClickHouse/ClickHouse/issues/59989): runs init scripts when force-enabled or when no database exists, rather than the inverse. [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)). +* This PR fixes scale conversion for DateTime64 values (for example, DateTime64(6)->DateTime64(3)). ```SQL create table test (result DateTime64(3)) engine=Memory;. [#60004](https://github.com/ClickHouse/ClickHouse/pull/60004) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix INSERT into SQLite with single quote (by properly escaping single quotes with a quote instead of backslash). [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)). +* Fix several logical errors in arrayFold. Fixes support for Nullable and LowCardinality. [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)). +* Fix optimize_uniq_to_count removing the column alias. [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible error while dropping s3queue table, like "no node shard0". [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix formatting of NOT with single literals. [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)). +* Use max_query_size from context in parsing changed settings in DDLWorker. Previously with large number of changed settings DDLWorker could fail with `Max query size exceeded` error and don't process log entries. [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix inconsistent formatting of queries containing tables named `table`. Fix wrong formatting of queries with `UNION ALL`, `INTERSECT`, and `EXCEPT` when their structure wasn't linear. This closes [#52349](https://github.com/ClickHouse/ClickHouse/issues/52349). Fix wrong formatting of `SYSTEM` queries, including `SYSTEM ... DROP FILESYSTEM CACHE`, `SYSTEM ... REFRESH/START/STOP/CANCEL/TEST VIEW`, `SYSTEM ENABLE/DISABLE FAILPOINT`. Fix formatting of parameterized DDL queries. Fix the formatting of the `DESCRIBE FILESYSTEM CACHE` query. Fix incorrect formatting of the `SET param_...` (a query setting a parameter). Fix incorrect formatting of `CREATE INDEX` queries. Fix inconsistent formatting of `CREATE USER` and similar queries. Fix inconsistent formatting of `CREATE SETTINGS PROFILE`. Fix incorrect formatting of `ALTER ... MODIFY REFRESH`. Fix inconsistent formatting of window functions if frame offsets were expressions. Fix inconsistent formatting of `RESPECT NULLS` and `IGNORE NULLS` if they were used after a function that implements an operator (such as `plus`). Fix idiotic formatting of `SYSTEM SYNC REPLICA ... LIGHTWEIGHT FROM ...`. Fix inconsistent formatting of invalid queries with `GROUP BY GROUPING SETS ... WITH ROLLUP/CUBE/TOTALS`. Fix inconsistent formatting of `GRANT CURRENT GRANTS`. Fix inconsistent formatting of `CREATE TABLE (... COLLATE)`. Additionally, I fixed the incorrect formatting of `EXPLAIN` in subqueries ([#60102](https://github.com/ClickHouse/ClickHouse/issues/60102)). Fixed incorrect formatting of lambda functions ([#60012](https://github.com/ClickHouse/ClickHouse/issues/60012)). Added a check so there is no way to miss these abominations in the future. [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Queries like `SELECT * FROM (EXPLAIN ...)` were formatted incorrectly. [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cosineDistance crash with Nullable. [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Boolean values in string representation now cast to true bools. E.g. this query previously threw an exception but now works: `SELECT true = 'true'`. [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix non-filled column `table_uuid` in `system.s3queue_log`. Added columns `database` and `table`. Renamed `table_uuid` to `uuid`. [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix arrayReduce with nullable aggregate function name. [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix actions execution during preliminary filtering (PK, partition pruning). [#60196](https://github.com/ClickHouse/ClickHouse/pull/60196) ([Azat Khuzhin](https://github.com/azat)). +* Hide sensitive info for `S3Queue` table engine. [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Restore the previous syntax `ORDER BY ALL` which has temporarily (for a few days) been replaced by ORDER BY *. [#60248](https://github.com/ClickHouse/ClickHouse/pull/60248) ([Robert Schulze](https://github.com/rschu1ze)). +* Fixed a minor bug that caused all http return codes to be 200 (success) instead of a relevant code on exception. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)). +* Fix bug in `S3Queue` table engine with ordered parallel mode. [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6. [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments. [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed a minor bug that prevented distributed table queries sent from either KQL or PRQL dialect clients to be executed on replicas. [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incomplete results with s3Cluster when multiple threads are used. [#60477](https://github.com/ClickHouse/ClickHouse/pull/60477) ([Antonio Andelic](https://github.com/antonio2368)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.2.2.71-stable.md b/docs/changelogs/v24.2.2.71-stable.md index b9aa5be626b..e17c22ab176 100644 --- a/docs/changelogs/v24.2.2.71-stable.md +++ b/docs/changelogs/v24.2.2.71-stable.md @@ -12,21 +12,21 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)). -* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). -* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). -* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)). -* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). -* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). -* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). -* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)). -* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#60640](https://github.com/ClickHouse/ClickHouse/issues/60640): Fixed a bug in parallel optimization for queries with `FINAL`, which could give an incorrect result in rare cases. [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#61085](https://github.com/ClickHouse/ClickHouse/issues/61085): Avoid calculation of scalar subqueries for `CREATE TABLE`. Fixes [#59795](https://github.com/ClickHouse/ClickHouse/issues/59795) and [#59930](https://github.com/ClickHouse/ClickHouse/issues/59930). Attempt to re-implement https://github.com/ClickHouse/ClickHouse/pull/57855. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#61332](https://github.com/ClickHouse/ClickHouse/issues/61332): Fix deadlock in parallel parsing when lots of rows are skipped due to errors. [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#61010](https://github.com/ClickHouse/ClickHouse/issues/61010): Fix the issue of `max_query_size` for KQL compound operator like mv-expand. Related to [#59626](https://github.com/ClickHouse/ClickHouse/issues/59626). [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Backported in [#61002](https://github.com/ClickHouse/ClickHouse/issues/61002): Reduce the number of read rows from `system.numbers`. Fixes [#59418](https://github.com/ClickHouse/ClickHouse/issues/59418). [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). +* Backported in [#60629](https://github.com/ClickHouse/ClickHouse/issues/60629): Don't output number tips for date types. [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60793](https://github.com/ClickHouse/ClickHouse/issues/60793): Fix buffer overflow that can happen if the attacker asks the HTTP server to decompress data with a composition of codecs and size triggering numeric overflow. Fix buffer overflow that can happen inside codec NONE on wrong input data. This was submitted by TIANGONG research team through our [Bug Bounty program](https://github.com/ClickHouse/ClickHouse/issues/38986). [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#60785](https://github.com/ClickHouse/ClickHouse/issues/60785): Functions for SQL/JSON were able to read uninitialized memory. This closes [#60017](https://github.com/ClickHouse/ClickHouse/issues/60017). Found by Fuzzer. [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#60805](https://github.com/ClickHouse/ClickHouse/issues/60805): Do not set aws custom metadata `x-amz-meta-*` headers on UploadPart & CompleteMultipartUpload calls. [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Backported in [#60822](https://github.com/ClickHouse/ClickHouse/issues/60822): Fix crash in arrayEnumerateRanked. [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60843](https://github.com/ClickHouse/ClickHouse/issues/60843): Fix crash when using input() in INSERT SELECT JOIN. Closes [#60035](https://github.com/ClickHouse/ClickHouse/issues/60035). [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#60919](https://github.com/ClickHouse/ClickHouse/issues/60919): Fix crash when `allow_experimental_analyzer` setting value is changed in the subqueries. [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#60906](https://github.com/ClickHouse/ClickHouse/issues/60906): Avoid segfault if too many keys are skipped when reading from S3. [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#61307](https://github.com/ClickHouse/ClickHouse/issues/61307): Fix multiple bugs in groupArraySorted. [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#61295](https://github.com/ClickHouse/ClickHouse/issues/61295): Keeper: fix runtime reconfig for standalone binary. [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.2.3.70-stable.md b/docs/changelogs/v24.2.3.70-stable.md index cd88877e254..1a50355e0b9 100644 --- a/docs/changelogs/v24.2.3.70-stable.md +++ b/docs/changelogs/v24.2.3.70-stable.md @@ -15,28 +15,28 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). -* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). -* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)). -* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)). -* Fix bug when reading system.parts using UUID (issue 61220). [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)). -* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)). -* Fix client `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). -* Cancel merges before removing moved parts [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). -* Mark CANNOT_PARSE_ESCAPE_SEQUENCE error as parse error to be able to skip it in row input formats [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). -* Crash in Engine Merge if Row Policy does not have expression [#61971](https://github.com/ClickHouse/ClickHouse/pull/61971) ([Ilya Golshtein](https://github.com/ilejn)). -* Fix data race on scalars in Context [#62305](https://github.com/ClickHouse/ClickHouse/pull/62305) ([Kruglov Pavel](https://github.com/Avogar)). -* Try to fix segfault in Hive engine [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix memory leak in groupArraySorted [#62597](https://github.com/ClickHouse/ClickHouse/pull/62597) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix GCD codec [#62853](https://github.com/ClickHouse/ClickHouse/pull/62853) ([Nikita Taranov](https://github.com/nickitat)). -* Fix temporary data in cache incorrectly processing failure of cache key directory creation [#62925](https://github.com/ClickHouse/ClickHouse/pull/62925) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix incorrect judgement of of monotonicity of function abs [#63097](https://github.com/ClickHouse/ClickHouse/pull/63097) ([Duc Canh Le](https://github.com/canhld94)). -* Make sanity check of settings worse [#63119](https://github.com/ClickHouse/ClickHouse/pull/63119) ([Raúl Marín](https://github.com/Algunenano)). -* Set server name for SSL handshake in MongoDB engine [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)). -* Format SQL security option only in `CREATE VIEW` queries. [#63136](https://github.com/ClickHouse/ClickHouse/pull/63136) ([pufit](https://github.com/pufit)). +* Backported in [#61453](https://github.com/ClickHouse/ClickHouse/issues/61453): Fix possible incorrect result of aggregate function `uniqExact`. [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#61946](https://github.com/ClickHouse/ClickHouse/issues/61946): Fix the ATTACH query with the ON CLUSTER clause when the database does not exist on the initiator node. Closes [#55009](https://github.com/ClickHouse/ClickHouse/issues/55009). [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#61846](https://github.com/ClickHouse/ClickHouse/issues/61846): Fixed possible wrong result of aggregation with nullable keys. [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#61591](https://github.com/ClickHouse/ClickHouse/issues/61591): ActionsDAG::split can't make sure that "Execution of first then second parts on block is equivalent to execution of initial DAG.". [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#61648](https://github.com/ClickHouse/ClickHouse/issues/61648): Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings. [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#61748](https://github.com/ClickHouse/ClickHouse/issues/61748): Fix incorrect results when filtering `system.parts` or `system.parts_columns` using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)). +* Backported in [#61963](https://github.com/ClickHouse/ClickHouse/issues/61963): Fix the `ALTER QUERY MODIFY SQL SECURITY` queries to override the table's DDL correctly. [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)). +* Backported in [#61699](https://github.com/ClickHouse/ClickHouse/issues/61699): Fix `clickhouse-client -s` argument, it was broken by defining it two times. [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61578](https://github.com/ClickHouse/ClickHouse/issues/61578): Fix string search with constant start position which previously could lead to memory corruption. [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#62531](https://github.com/ClickHouse/ClickHouse/issues/62531): Fix data race between `MOVE PARTITION` query and merges resulting in intersecting parts. [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#61860](https://github.com/ClickHouse/ClickHouse/issues/61860): Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` when specifying incorrect UTF-8 sequence. Example: [#61714](https://github.com/ClickHouse/ClickHouse/issues/61714#issuecomment-2012768202). [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). +* Backported in [#62242](https://github.com/ClickHouse/ClickHouse/issues/62242): Fix skipping escape sequcne parsing errors during JSON data parsing while using `input_format_allow_errors_num/ratio` settings. [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#62218](https://github.com/ClickHouse/ClickHouse/issues/62218): Fixes Crash in Engine Merge if Row Policy does not have expression. [#61971](https://github.com/ClickHouse/ClickHouse/pull/61971) ([Ilya Golshtein](https://github.com/ilejn)). +* Backported in [#62342](https://github.com/ClickHouse/ClickHouse/issues/62342): Fix data race on scalars in Context. [#62305](https://github.com/ClickHouse/ClickHouse/pull/62305) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#62677](https://github.com/ClickHouse/ClickHouse/issues/62677): Fix segmentation fault when using Hive table engine. Reference [#62154](https://github.com/ClickHouse/ClickHouse/issues/62154), [#62560](https://github.com/ClickHouse/ClickHouse/issues/62560). [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#62639](https://github.com/ClickHouse/ClickHouse/issues/62639): Fix memory leak in groupArraySorted. Fix [#62536](https://github.com/ClickHouse/ClickHouse/issues/62536). [#62597](https://github.com/ClickHouse/ClickHouse/pull/62597) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#63054](https://github.com/ClickHouse/ClickHouse/issues/63054): Fixed bug in GCD codec implementation that may lead to server crashes. [#62853](https://github.com/ClickHouse/ClickHouse/pull/62853) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#63030](https://github.com/ClickHouse/ClickHouse/issues/63030): Fix temporary data in cache incorrect behaviour in case creation of cache key base directory fails with `no space left on device`. [#62925](https://github.com/ClickHouse/ClickHouse/pull/62925) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#63142](https://github.com/ClickHouse/ClickHouse/issues/63142): Fix incorrect judgement of of monotonicity of function `abs`. [#63097](https://github.com/ClickHouse/ClickHouse/pull/63097) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#63183](https://github.com/ClickHouse/ClickHouse/issues/63183): Sanity check: Clamp values instead of throwing. [#63119](https://github.com/ClickHouse/ClickHouse/pull/63119) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63176](https://github.com/ClickHouse/ClickHouse/issues/63176): Setting server_name might help with recently reported SSL handshake error when connecting to MongoDB Atlas: `Poco::Exception. Code: 1000, e.code() = 0, SSL Exception: error:10000438:SSL routines:OPENSSL_internal:TLSV1_ALERT_INTERNAL_ERROR`. [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#63191](https://github.com/ClickHouse/ClickHouse/issues/63191): Fix a bug when `SQL SECURITY` statement appears in all `CREATE` queries if the server setting `ignore_empty_sql_security_in_create_view_query=true` https://github.com/ClickHouse/ClickHouse/pull/63134. [#63136](https://github.com/ClickHouse/ClickHouse/pull/63136) ([pufit](https://github.com/pufit)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.3.1.2672-lts.md b/docs/changelogs/v24.3.1.2672-lts.md index e5d008680a8..a70a33971c2 100644 --- a/docs/changelogs/v24.3.1.2672-lts.md +++ b/docs/changelogs/v24.3.1.2672-lts.md @@ -20,7 +20,7 @@ sidebar_label: 2024 #### New Feature * Topk/topkweighed support mode, which return count of values and it's error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)). -* Add generate_series as a table function. This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([divanik](https://github.com/divanik)). +* Add generate_series as a table function. This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([Daniil Ivanik](https://github.com/divanik)). * Support reading and writing backups as tar archives. [#59535](https://github.com/ClickHouse/ClickHouse/pull/59535) ([josh-hildred](https://github.com/josh-hildred)). * Implemented support for S3Express buckets. [#59965](https://github.com/ClickHouse/ClickHouse/pull/59965) ([Nikita Taranov](https://github.com/nickitat)). * Allow to attach parts from a different disk * attach partition from the table on other disks using copy instead of hard link (such as instant table) * attach partition using copy when the hard link fails even on the same disk. [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112) ([Unalian](https://github.com/Unalian)). @@ -133,75 +133,75 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix function execution over const and LowCardinality with GROUP BY const for analyzer [#59986](https://github.com/ClickHouse/ClickHouse/pull/59986) ([Azat Khuzhin](https://github.com/azat)). -* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)). -* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)). -* Azure Blob Storage : Fix issues endpoint and prefix [#60251](https://github.com/ClickHouse/ClickHouse/pull/60251) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* fix LRUResource Cache bug (Hive cache) [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)). -* Force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)). -* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)). -* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). -* Keeper fix: add timeouts when waiting for commit logs [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)). -* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). -* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)). -* Fix reading from MergeTree with non-deterministic functions in filter [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix logical error on bad compatibility setting value type [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix inconsistent aggregate function states in mixed x86-64 / ARM clusters [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)). -* fix(prql): Robust panic handler [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)). -* Fix `intDiv` for decimal and date arguments [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Fix: expand CTE in alter modify query [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory) [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)). -* Fix "Invalid storage definition in metadata file" for parameterized views [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)). -* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove wrong sanitize checking in aggregate function quantileGK [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)). -* Fix insert-select + insert_deduplication_token bug by setting streams to 1 [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)). -* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). -* Fix toStartOfInterval [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). -* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). -* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix possible stuck on error in HashedDictionaryParallelLoader [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)). -* Fix async RESTORE with Replicated database [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)). -* fix csv format not support tuple [#60994](https://github.com/ClickHouse/ClickHouse/pull/60994) ([shuai.xu](https://github.com/shuai-xu)). -* Fix deadlock in async inserts to `Log` tables via native protocol [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)). -* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)). -* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix usage of session_token in S3 engine [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bugs in show database [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)). -* Fix logical error in RabbitMQ storage with MATERIALIZED columns [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)). -* Fix CREATE OR REPLACE DICTIONARY [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix crash in ObjectJson parsing array with nulls [#61364](https://github.com/ClickHouse/ClickHouse/pull/61364) ([vdimir](https://github.com/vdimir)). -* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). -* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)). -* Fix finishing a failed RESTORE [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)). -* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)). -* Allow queuing in restore pool [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)). -* Fix bug when reading system.parts using UUID (issue 61220). [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)). -* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)). -* Fix crash in window view [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix `repeat` with non native integers [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix client `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Reset part level upon attach from disk on MergeTree [#61536](https://github.com/ClickHouse/ClickHouse/pull/61536) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix crash in arrayPartialReverseSort [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)). -* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix addDays cause an error when used datetime64 [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)). -* disallow LowCardinality input type for JSONExtract [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)). -* Fix `system.part_log` for async insert with deduplication [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix Non-ready set for system.parts. [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Don't allow the same expression in ORDER BY with and without WITH FILL [#61667](https://github.com/ClickHouse/ClickHouse/pull/61667) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix actual_part_name for REPLACE_RANGE (`Entry actual part isn't empty yet`) [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix columns after executing MODIFY QUERY for a materialized view with internal table [#61734](https://github.com/ClickHouse/ClickHouse/pull/61734) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). -* Fix RANGE frame is not supported for Nullable columns. [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)). -* Revert "Revert "Fix bug when reading system.parts using UUID (issue 61220)."" [#61779](https://github.com/ClickHouse/ClickHouse/pull/61779) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix function execution over const and LowCardinality with GROUP BY const for analyzer. [#59986](https://github.com/ClickHouse/ClickHouse/pull/59986) ([Azat Khuzhin](https://github.com/azat)). +* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything). [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a bug in parallel optimization for queries with `FINAL`, which could give an incorrect result in rare cases. [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)). +* Updated to not include account_name in endpoint if flag `endpoint_contains_account_name` is set and fixed issue with empty container name. [#60251](https://github.com/ClickHouse/ClickHouse/pull/60251) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix LRUResource Cache implementation that can be triggered by incorrect component usage. Error can't be triggered with current ClickHouse usage. close [#60122](https://github.com/ClickHouse/ClickHouse/issues/60122). [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)). +* Force reanalysis of the query if parallel replicas isn't supported in a subquery. [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)). +* Fix usage of plain metadata type for new disks configuration option. [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike. [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)). +* Avoid calculation of scalar subqueries for `CREATE TABLE`. Fixes [#59795](https://github.com/ClickHouse/ClickHouse/issues/59795) and [#59930](https://github.com/ClickHouse/ClickHouse/issues/59930). Attempt to re-implement https://github.com/ClickHouse/ClickHouse/pull/57855. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix deadlock in parallel parsing when lots of rows are skipped due to errors. [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix the issue of `max_query_size` for KQL compound operator like mv-expand. Related to [#59626](https://github.com/ClickHouse/ClickHouse/issues/59626). [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Keeper fix: add timeouts when waiting for commit logs. Keeper could get stuck if the log successfully gets replicated but never committed. [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)). +* Reduce the number of read rows from `system.numbers`. Fixes [#59418](https://github.com/ClickHouse/ClickHouse/issues/59418). [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). +* Don't output number tips for date types. [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)). +* Fix unexpected result during reading from tables with virtual columns when filter contains non-deterministic functions. Closes [#61106](https://github.com/ClickHouse/ClickHouse/issues/61106). [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix logical error on bad compatibility setting value type. Closes [#60590](https://github.com/ClickHouse/ClickHouse/issues/60590). [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed potentially inconsistent aggregate function states in mixed x86-64 / ARM clusters. [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Isolates the ClickHouse binary from any panics in `prqlc`. [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)). +* Fixing bug where `intDiv` with decimal and date/datetime as arguments leads to crash. Closes [#60653](https://github.com/ClickHouse/ClickHouse/issues/60653). [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix bug when attempt to 'ALTER TABLE ... MODIFY QUERY' with CTE ends up with "Table [CTE] does not exist" exception (Code: 60). [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory - major user is `clickhouse-local`). [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)). +* Fix "Invalid storage definition in metadata file" for parameterized views. [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)). +* Fix buffer overflow that can happen if the attacker asks the HTTP server to decompress data with a composition of codecs and size triggering numeric overflow. Fix buffer overflow that can happen inside codec NONE on wrong input data. This was submitted by TIANGONG research team through our [Bug Bounty program](https://github.com/ClickHouse/ClickHouse/issues/38986). [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Functions for SQL/JSON were able to read uninitialized memory. This closes [#60017](https://github.com/ClickHouse/ClickHouse/issues/60017). Found by Fuzzer. [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove wrong sanitize checking in aggregate function quantileGK: `sampled_len` in `ApproxSampler` is not guaranteed to be less than `default_compress_threshold`. `default_compress_threshold` is a just soft limitation while executing `ApproxSampler::insert`. cc @Algunenano. This issue was reproduced in https://github.com/oap-project/gluten/pull/4829. [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)). +* Fix the issue causing undesired deduplication on insert-select queries passing a custom `insert_deduplication_token.` The change sets streams to 1 in those cases to prevent the issue from happening at the expense of ignoring `max_insert_threads > 1`. [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)). +* Do not set aws custom metadata `x-amz-meta-*` headers on UploadPart & CompleteMultipartUpload calls. [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* One more fix for toStartOfInterval returning wrong result for interval smaller than second. [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix crash in arrayEnumerateRanked. [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN. Closes [#60035](https://github.com/ClickHouse/ClickHouse/issues/60035). [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash when `allow_experimental_analyzer` setting value is changed in the subqueries. [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Avoid segfault if too many keys are skipped when reading from S3. [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible stuck on error while reloading dictionary with `SHARDS`. [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)). +* Fix async RESTORE with Replicated database. [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix csv write tuple in a wrong format and can not read it. [#60994](https://github.com/ClickHouse/ClickHouse/pull/60994) ([shuai.xu](https://github.com/shuai-xu)). +* Fixed deadlock in async inserts to `Log` tables via native protocol. [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)). +* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary that could lead to nullptr dereference on bad column types in FunctionsConversion. Closes [#56661](https://github.com/ClickHouse/ClickHouse/issues/56661). [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix multiple bugs in groupArraySorted. [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)). +* Keeper: fix runtime reconfig for standalone binary. [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix usage of session_token in S3 engine. Fixes https://github.com/ClickHouse/ClickHouse/pull/57850#issuecomment-1966404710. [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible incorrect result of aggregate function `uniqExact`. [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bugs in show database. [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible `LOGICAL_ERROR` in case storage with `RabbitMQ` engine has unsupported `MATERIALIZED|ALIAS|DEFAULT` columns. [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)). +* This PR fixes `CREATE OR REPLACE DICTIONARY` with `lazy_load` turned off. [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible crash in `Object('json')` data type parsing array with `null`s. [#61364](https://github.com/ClickHouse/ClickHouse/pull/61364) ([vdimir](https://github.com/vdimir)). +* Fix the ATTACH query with the ON CLUSTER clause when the database does not exist on the initiator node. Closes [#55009](https://github.com/ClickHouse/ClickHouse/issues/55009). [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixed possible wrong result of aggregation with nullable keys. [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)). +* ActionsDAG::split can't make sure that "Execution of first then second parts on block is equivalent to execution of initial DAG.". [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)). +* Fix finishing a failed RESTORE. [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)). +* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings. [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)). +* Fix deadlock during `restore database` execution if `restore_threads` was set to 1. [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)). +* Fix incorrect results when filtering `system.parts` or `system.parts_columns` using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)). +* Fix the `ALTER QUERY MODIFY SQL SECURITY` queries to override the table's DDL correctly. [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)). +* The experimental "window view" feature (it is disabled by default), which should not be used in production, could lead to a crash. Issue was identified by YohannJardin via Bugcrowd program. [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `repeat` with non-native integers (e.g. `UInt256`). [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `clickhouse-client -s` argument, it was broken by defining it two times. [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix too high part level reported in [#58558](https://github.com/ClickHouse/ClickHouse/issues/58558) by resetting MergeTree part levels upon attach from disk just like `ReplicatedMergeTree` [does](https://github.com/ClickHouse/ClickHouse/blob/9cd7e6155c7027baccd6dc5380d0813db94b03cc/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp#L838). [#61536](https://github.com/ClickHouse/ClickHouse/pull/61536) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in arrayPartialReverseSort. [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)). +* Fix string search with constant start position which previously could lead to memory corruption. [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix the issue where the function `addDays` (and similar functions) reports an error when the first parameter is `DateTime64`. [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)). +* Disallow LowCardinality type for the column containing JSON input in the JSONExtract function. [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)). +* Add parts to `system.part_log` when created using async insert with deduplication. [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `Not-ready Set` error while reading from `system.parts` (with `IN subquery`). Was introduced in [#60510](https://github.com/ClickHouse/ClickHouse/issues/60510). [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow the same expression in ORDER BY with and without WITH FILL. Such invalid expression could lead to logical error `Invalid number of rows in Chunk`. [#61667](https://github.com/ClickHouse/ClickHouse/pull/61667) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed `Entry actual part isn't empty yet. This is a bug. (LOGICAL_ERROR)` that might happen in rare cases after executing `REPLACE PARTITION`, `MOVE PARTITION TO TABLE` or `ATTACH PARTITION FROM`. [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix columns after executing `ALTER TABLE MODIFY QUERY` for a materialized view with internal table. A materialized view must have the same columns as its internal table if any, however `MODIFY QUERY` could break that rule before this PR causing the materialized view to be inconsistent. [#61734](https://github.com/ClickHouse/ClickHouse/pull/61734) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` when specifying incorrect UTF-8 sequence. Example: [#61714](https://github.com/ClickHouse/ClickHouse/issues/61714#issuecomment-2012768202). [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)). +* Fix RANGE frame is not supported for Nullable columns. ``` SELECT number, sum(number) OVER (ORDER BY number ASC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) AS sum FROM values('number Nullable(Int8)', 1, 1, 2, 3, NULL). [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)). +* Fix incorrect results when filtering `system.parts` or `system.parts_columns` using UUID. [#61779](https://github.com/ClickHouse/ClickHouse/pull/61779) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.3.2.23-lts.md b/docs/changelogs/v24.3.2.23-lts.md index 4d59a1cedf6..d8adc63c8ac 100644 --- a/docs/changelogs/v24.3.2.23-lts.md +++ b/docs/changelogs/v24.3.2.23-lts.md @@ -9,9 +9,9 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix external table cannot parse data type Bool [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)). -* Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value" [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#62078](https://github.com/ClickHouse/ClickHouse/issues/62078): Fix logical error ''Unexpected return type from materialize. Expected Nullable. Got UInt8' while using group_by_use_nulls with analyzer and materialize/constant in grouping set. Closes [#61531](https://github.com/ClickHouse/ClickHouse/issues/61531). [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#62122](https://github.com/ClickHouse/ClickHouse/issues/62122): Fix external table cannot parse data type Bool. [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#62147](https://github.com/ClickHouse/ClickHouse/issues/62147): Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value". The feature is broken and can't be disabled individually. [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.3.3.102-lts.md b/docs/changelogs/v24.3.3.102-lts.md index dc89ac24208..1cdbde67031 100644 --- a/docs/changelogs/v24.3.3.102-lts.md +++ b/docs/changelogs/v24.3.3.102-lts.md @@ -17,36 +17,36 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Cancel merges before removing moved parts [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Mark CANNOT_PARSE_ESCAPE_SEQUENCE error as parse error to be able to skip it in row input formats [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). -* Crash in Engine Merge if Row Policy does not have expression [#61971](https://github.com/ClickHouse/ClickHouse/pull/61971) ([Ilya Golshtein](https://github.com/ilejn)). -* ReadWriteBufferFromHTTP set right header host when redirected [#62068](https://github.com/ClickHouse/ClickHouse/pull/62068) ([Sema Checherinda](https://github.com/CheSema)). -* Analyzer: Fix query parameter resolution [#62186](https://github.com/ClickHouse/ClickHouse/pull/62186) ([Dmitry Novik](https://github.com/novikd)). -* Fixing NULL random seed for generateRandom with analyzer. [#62248](https://github.com/ClickHouse/ClickHouse/pull/62248) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix PartsSplitter [#62268](https://github.com/ClickHouse/ClickHouse/pull/62268) ([Nikita Taranov](https://github.com/nickitat)). -* Analyzer: Fix alias to parametrized view resolution [#62274](https://github.com/ClickHouse/ClickHouse/pull/62274) ([Dmitry Novik](https://github.com/novikd)). -* Analyzer: Fix name resolution from parent scopes [#62281](https://github.com/ClickHouse/ClickHouse/pull/62281) ([Dmitry Novik](https://github.com/novikd)). -* Fix argMax with nullable non native numeric column [#62285](https://github.com/ClickHouse/ClickHouse/pull/62285) ([Raúl Marín](https://github.com/Algunenano)). -* Fix data race on scalars in Context [#62305](https://github.com/ClickHouse/ClickHouse/pull/62305) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix analyzer with positional arguments in distributed query [#62362](https://github.com/ClickHouse/ClickHouse/pull/62362) ([flynn](https://github.com/ucasfl)). -* Fix filter pushdown from additional_table_filters in Merge engine in analyzer [#62398](https://github.com/ClickHouse/ClickHouse/pull/62398) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix GLOBAL IN table queries with analyzer. [#62409](https://github.com/ClickHouse/ClickHouse/pull/62409) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix scalar subquery in LIMIT [#62567](https://github.com/ClickHouse/ClickHouse/pull/62567) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Try to fix segfault in Hive engine [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix memory leak in groupArraySorted [#62597](https://github.com/ClickHouse/ClickHouse/pull/62597) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix argMin/argMax combinator state [#62708](https://github.com/ClickHouse/ClickHouse/pull/62708) ([Raúl Marín](https://github.com/Algunenano)). -* Fix temporary data in cache failing because of cache lock contention optimization [#62715](https://github.com/ClickHouse/ClickHouse/pull/62715) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix FINAL modifier is not respected in CTE with analyzer [#62811](https://github.com/ClickHouse/ClickHouse/pull/62811) ([Duc Canh Le](https://github.com/canhld94)). -* Fix crash in function `formatRow` with `JSON` format and HTTP interface [#62840](https://github.com/ClickHouse/ClickHouse/pull/62840) ([Anton Popov](https://github.com/CurtizJ)). -* Fix GCD codec [#62853](https://github.com/ClickHouse/ClickHouse/pull/62853) ([Nikita Taranov](https://github.com/nickitat)). -* Disable optimize_rewrite_aggregate_function_with_if for sum(nullable) [#62912](https://github.com/ClickHouse/ClickHouse/pull/62912) ([Raúl Marín](https://github.com/Algunenano)). -* Fix temporary data in cache incorrectly processing failure of cache key directory creation [#62925](https://github.com/ClickHouse/ClickHouse/pull/62925) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix optimize_rewrite_aggregate_function_with_if implicit cast [#62999](https://github.com/ClickHouse/ClickHouse/pull/62999) ([Raúl Marín](https://github.com/Algunenano)). -* Do not remove server constants from GROUP BY key for secondary query. [#63047](https://github.com/ClickHouse/ClickHouse/pull/63047) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix incorrect judgement of of monotonicity of function abs [#63097](https://github.com/ClickHouse/ClickHouse/pull/63097) ([Duc Canh Le](https://github.com/canhld94)). -* Set server name for SSL handshake in MongoDB engine [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)). -* Use user specified db instead of "config" for MongoDB wire protocol version check [#63126](https://github.com/ClickHouse/ClickHouse/pull/63126) ([Alexander Gololobov](https://github.com/davenger)). -* Format SQL security option only in `CREATE VIEW` queries. [#63136](https://github.com/ClickHouse/ClickHouse/pull/63136) ([pufit](https://github.com/pufit)). +* Backported in [#62533](https://github.com/ClickHouse/ClickHouse/issues/62533): Fix data race between `MOVE PARTITION` query and merges resulting in intersecting parts. [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#62244](https://github.com/ClickHouse/ClickHouse/issues/62244): Fix skipping escape sequcne parsing errors during JSON data parsing while using `input_format_allow_errors_num/ratio` settings. [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#62220](https://github.com/ClickHouse/ClickHouse/issues/62220): Fixes Crash in Engine Merge if Row Policy does not have expression. [#61971](https://github.com/ClickHouse/ClickHouse/pull/61971) ([Ilya Golshtein](https://github.com/ilejn)). +* Backported in [#62234](https://github.com/ClickHouse/ClickHouse/issues/62234): ReadWriteBufferFromHTTP set right header host when redirected. [#62068](https://github.com/ClickHouse/ClickHouse/pull/62068) ([Sema Checherinda](https://github.com/CheSema)). +* Backported in [#62278](https://github.com/ClickHouse/ClickHouse/issues/62278): Fix query parameter resolution with `allow_experimental_analyzer` enabled. Closes [#62113](https://github.com/ClickHouse/ClickHouse/issues/62113). [#62186](https://github.com/ClickHouse/ClickHouse/pull/62186) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#62354](https://github.com/ClickHouse/ClickHouse/issues/62354): Fix `generateRandom` with `NULL` in the seed argument. Fixes [#62092](https://github.com/ClickHouse/ClickHouse/issues/62092). [#62248](https://github.com/ClickHouse/ClickHouse/pull/62248) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#62412](https://github.com/ClickHouse/ClickHouse/issues/62412): When some index columns are not loaded into memory for some parts of a *MergeTree table, queries with `FINAL` might produce wrong results. Now we explicitly choose only the common prefix of index columns for all parts to avoid this issue. [#62268](https://github.com/ClickHouse/ClickHouse/pull/62268) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#62733](https://github.com/ClickHouse/ClickHouse/issues/62733): Fix inability to address parametrized view in SELECT queries via aliases. [#62274](https://github.com/ClickHouse/ClickHouse/pull/62274) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#62407](https://github.com/ClickHouse/ClickHouse/issues/62407): Fix name resolution in case when identifier is resolved to an executed scalar subquery. [#62281](https://github.com/ClickHouse/ClickHouse/pull/62281) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#62331](https://github.com/ClickHouse/ClickHouse/issues/62331): Fix argMax with nullable non native numeric column. [#62285](https://github.com/ClickHouse/ClickHouse/pull/62285) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#62344](https://github.com/ClickHouse/ClickHouse/issues/62344): Fix data race on scalars in Context. [#62305](https://github.com/ClickHouse/ClickHouse/pull/62305) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#62484](https://github.com/ClickHouse/ClickHouse/issues/62484): Resolve positional arguments only on the initiator node. Closes [#62289](https://github.com/ClickHouse/ClickHouse/issues/62289). [#62362](https://github.com/ClickHouse/ClickHouse/pull/62362) ([flynn](https://github.com/ucasfl)). +* Backported in [#62442](https://github.com/ClickHouse/ClickHouse/issues/62442): Fix filter pushdown from additional_table_filters in Merge engine in analyzer. Closes [#62229](https://github.com/ClickHouse/ClickHouse/issues/62229). [#62398](https://github.com/ClickHouse/ClickHouse/pull/62398) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#62475](https://github.com/ClickHouse/ClickHouse/issues/62475): Fix `Unknown expression or table expression identifier` error for `GLOBAL IN table` queries (with new analyzer). Fixes [#62286](https://github.com/ClickHouse/ClickHouse/issues/62286). [#62409](https://github.com/ClickHouse/ClickHouse/pull/62409) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#62612](https://github.com/ClickHouse/ClickHouse/issues/62612): Fix an error `LIMIT expression must be constant` in queries with constant expression in `LIMIT`/`OFFSET` which contains scalar subquery. Fixes [#62294](https://github.com/ClickHouse/ClickHouse/issues/62294). [#62567](https://github.com/ClickHouse/ClickHouse/pull/62567) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#62679](https://github.com/ClickHouse/ClickHouse/issues/62679): Fix segmentation fault when using Hive table engine. Reference [#62154](https://github.com/ClickHouse/ClickHouse/issues/62154), [#62560](https://github.com/ClickHouse/ClickHouse/issues/62560). [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#62641](https://github.com/ClickHouse/ClickHouse/issues/62641): Fix memory leak in groupArraySorted. Fix [#62536](https://github.com/ClickHouse/ClickHouse/issues/62536). [#62597](https://github.com/ClickHouse/ClickHouse/pull/62597) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#62770](https://github.com/ClickHouse/ClickHouse/issues/62770): Fix argMin/argMax combinator state. [#62708](https://github.com/ClickHouse/ClickHouse/pull/62708) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#62750](https://github.com/ClickHouse/ClickHouse/issues/62750): Fix temporary data in cache failing because of a small value of setting `filesystem_cache_reserve_space_wait_lock_timeout_milliseconds`. Introduced a separate setting `temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds`. [#62715](https://github.com/ClickHouse/ClickHouse/pull/62715) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#62993](https://github.com/ClickHouse/ClickHouse/issues/62993): Fix an error when `FINAL` is not applied when specified in CTE (new analyzer). Fixes [#62779](https://github.com/ClickHouse/ClickHouse/issues/62779). [#62811](https://github.com/ClickHouse/ClickHouse/pull/62811) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#62859](https://github.com/ClickHouse/ClickHouse/issues/62859): Fixed crash in function `formatRow` with `JSON` format in queries executed via the HTTP interface. [#62840](https://github.com/ClickHouse/ClickHouse/pull/62840) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#63056](https://github.com/ClickHouse/ClickHouse/issues/63056): Fixed bug in GCD codec implementation that may lead to server crashes. [#62853](https://github.com/ClickHouse/ClickHouse/pull/62853) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#62960](https://github.com/ClickHouse/ClickHouse/issues/62960): Disable optimize_rewrite_aggregate_function_with_if for sum(nullable). [#62912](https://github.com/ClickHouse/ClickHouse/pull/62912) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63032](https://github.com/ClickHouse/ClickHouse/issues/63032): Fix temporary data in cache incorrect behaviour in case creation of cache key base directory fails with `no space left on device`. [#62925](https://github.com/ClickHouse/ClickHouse/pull/62925) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#63148](https://github.com/ClickHouse/ClickHouse/issues/63148): Fix optimize_rewrite_aggregate_function_with_if implicit cast. [#62999](https://github.com/ClickHouse/ClickHouse/pull/62999) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63146](https://github.com/ClickHouse/ClickHouse/issues/63146): Fix `Not found column in block` error for distributed queries with server-side constants in `GROUP BY` key. Fixes [#62682](https://github.com/ClickHouse/ClickHouse/issues/62682). [#63047](https://github.com/ClickHouse/ClickHouse/pull/63047) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63144](https://github.com/ClickHouse/ClickHouse/issues/63144): Fix incorrect judgement of of monotonicity of function `abs`. [#63097](https://github.com/ClickHouse/ClickHouse/pull/63097) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#63178](https://github.com/ClickHouse/ClickHouse/issues/63178): Setting server_name might help with recently reported SSL handshake error when connecting to MongoDB Atlas: `Poco::Exception. Code: 1000, e.code() = 0, SSL Exception: error:10000438:SSL routines:OPENSSL_internal:TLSV1_ALERT_INTERNAL_ERROR`. [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#63170](https://github.com/ClickHouse/ClickHouse/issues/63170): The wire protocol version check for MongoDB used to try accessing "config" database, but this can fail if the user doesn't have permissions for it. The fix is to use the database name provided by user. [#63126](https://github.com/ClickHouse/ClickHouse/pull/63126) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#63193](https://github.com/ClickHouse/ClickHouse/issues/63193): Fix a bug when `SQL SECURITY` statement appears in all `CREATE` queries if the server setting `ignore_empty_sql_security_in_create_view_query=true` https://github.com/ClickHouse/ClickHouse/pull/63134. [#63136](https://github.com/ClickHouse/ClickHouse/pull/63136) ([pufit](https://github.com/pufit)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.4.1.2088-stable.md b/docs/changelogs/v24.4.1.2088-stable.md index b8d83f1a31f..06e704356d4 100644 --- a/docs/changelogs/v24.4.1.2088-stable.md +++ b/docs/changelogs/v24.4.1.2088-stable.md @@ -106,75 +106,75 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix parser error when using COUNT(*) with FILTER clause [#61357](https://github.com/ClickHouse/ClickHouse/pull/61357) ([Duc Canh Le](https://github.com/canhld94)). -* Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)). -* Cancel merges before removing moved parts [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Try to fix abort in arrow [#61720](https://github.com/ClickHouse/ClickHouse/pull/61720) ([Kruglov Pavel](https://github.com/Avogar)). -* Search for convert_to_replicated flag at the correct path [#61769](https://github.com/ClickHouse/ClickHouse/pull/61769) ([Kirill](https://github.com/kirillgarbar)). -* Fix possible connections data-race for distributed_foreground_insert/distributed_background_insert_batch [#61867](https://github.com/ClickHouse/ClickHouse/pull/61867) ([Azat Khuzhin](https://github.com/azat)). -* Mark CANNOT_PARSE_ESCAPE_SEQUENCE error as parse error to be able to skip it in row input formats [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix writing exception message in output format in HTTP when http_wait_end_of_query is used [#61951](https://github.com/ClickHouse/ClickHouse/pull/61951) ([Kruglov Pavel](https://github.com/Avogar)). -* Proper fix for LowCardinality together with JSONExtact functions [#61957](https://github.com/ClickHouse/ClickHouse/pull/61957) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Crash in Engine Merge if Row Policy does not have expression [#61971](https://github.com/ClickHouse/ClickHouse/pull/61971) ([Ilya Golshtein](https://github.com/ilejn)). -* Fix WriteBufferAzureBlobStorage destructor uncaught exception [#61988](https://github.com/ClickHouse/ClickHouse/pull/61988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix CREATE TABLE w/o columns definition for ReplicatedMergeTree [#62040](https://github.com/ClickHouse/ClickHouse/pull/62040) ([Azat Khuzhin](https://github.com/azat)). -* Fix optimize_skip_unused_shards_rewrite_in for composite sharding key [#62047](https://github.com/ClickHouse/ClickHouse/pull/62047) ([Azat Khuzhin](https://github.com/azat)). -* ReadWriteBufferFromHTTP set right header host when redirected [#62068](https://github.com/ClickHouse/ClickHouse/pull/62068) ([Sema Checherinda](https://github.com/CheSema)). -* Fix external table cannot parse data type Bool [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)). -* Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value" [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)). -* Add test for [#35215](https://github.com/ClickHouse/ClickHouse/issues/35215) [#62180](https://github.com/ClickHouse/ClickHouse/pull/62180) ([Raúl Marín](https://github.com/Algunenano)). -* Analyzer: Fix query parameter resolution [#62186](https://github.com/ClickHouse/ClickHouse/pull/62186) ([Dmitry Novik](https://github.com/novikd)). -* Fix restoring parts while readonly [#62207](https://github.com/ClickHouse/ClickHouse/pull/62207) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix crash in index definition containing sql udf [#62225](https://github.com/ClickHouse/ClickHouse/pull/62225) ([vdimir](https://github.com/vdimir)). -* Fixing NULL random seed for generateRandom with analyzer. [#62248](https://github.com/ClickHouse/ClickHouse/pull/62248) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Correctly handle const columns in DistinctTransfom [#62250](https://github.com/ClickHouse/ClickHouse/pull/62250) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix PartsSplitter [#62268](https://github.com/ClickHouse/ClickHouse/pull/62268) ([Nikita Taranov](https://github.com/nickitat)). -* Analyzer: Fix alias to parametrized view resolution [#62274](https://github.com/ClickHouse/ClickHouse/pull/62274) ([Dmitry Novik](https://github.com/novikd)). -* Analyzer: Fix name resolution from parent scopes [#62281](https://github.com/ClickHouse/ClickHouse/pull/62281) ([Dmitry Novik](https://github.com/novikd)). -* Fix argMax with nullable non native numeric column [#62285](https://github.com/ClickHouse/ClickHouse/pull/62285) ([Raúl Marín](https://github.com/Algunenano)). -* Fix BACKUP and RESTORE of a materialized view in Ordinary database [#62295](https://github.com/ClickHouse/ClickHouse/pull/62295) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix data race on scalars in Context [#62305](https://github.com/ClickHouse/ClickHouse/pull/62305) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix primary key in materialized view [#62319](https://github.com/ClickHouse/ClickHouse/pull/62319) ([Murat Khairulin](https://github.com/mxwell)). -* Do not build multithread insert pipeline for tables without support [#62333](https://github.com/ClickHouse/ClickHouse/pull/62333) ([vdimir](https://github.com/vdimir)). -* Fix analyzer with positional arguments in distributed query [#62362](https://github.com/ClickHouse/ClickHouse/pull/62362) ([flynn](https://github.com/ucasfl)). -* Fix filter pushdown from additional_table_filters in Merge engine in analyzer [#62398](https://github.com/ClickHouse/ClickHouse/pull/62398) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix GLOBAL IN table queries with analyzer. [#62409](https://github.com/ClickHouse/ClickHouse/pull/62409) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Respect settings truncate_on_insert/create_new_file_on_insert in s3/hdfs/azure engines during partitioned write [#62425](https://github.com/ClickHouse/ClickHouse/pull/62425) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix backup restore path for AzureBlobStorage [#62447](https://github.com/ClickHouse/ClickHouse/pull/62447) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix SimpleSquashingChunksTransform [#62451](https://github.com/ClickHouse/ClickHouse/pull/62451) ([Nikita Taranov](https://github.com/nickitat)). -* Fix capture of nested lambda. [#62462](https://github.com/ClickHouse/ClickHouse/pull/62462) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix validation of special MergeTree columns [#62498](https://github.com/ClickHouse/ClickHouse/pull/62498) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Avoid crash when reading protobuf with recursive types [#62506](https://github.com/ClickHouse/ClickHouse/pull/62506) ([Raúl Marín](https://github.com/Algunenano)). -* Fix a bug moving one partition from one to itself [#62524](https://github.com/ClickHouse/ClickHouse/pull/62524) ([helifu](https://github.com/helifu)). -* Fix scalar subquery in LIMIT [#62567](https://github.com/ClickHouse/ClickHouse/pull/62567) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Try to fix segfault in Hive engine [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix memory leak in groupArraySorted [#62597](https://github.com/ClickHouse/ClickHouse/pull/62597) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix crash in largestTriangleThreeBuckets [#62646](https://github.com/ClickHouse/ClickHouse/pull/62646) ([Raúl Marín](https://github.com/Algunenano)). -* Fix tumble[Start,End] and hop[Start,End] for bigger resolutions [#62705](https://github.com/ClickHouse/ClickHouse/pull/62705) ([Jordi Villar](https://github.com/jrdi)). -* Fix argMin/argMax combinator state [#62708](https://github.com/ClickHouse/ClickHouse/pull/62708) ([Raúl Marín](https://github.com/Algunenano)). -* Fix temporary data in cache failing because of cache lock contention optimization [#62715](https://github.com/ClickHouse/ClickHouse/pull/62715) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix crash in function `mergeTreeIndex` [#62762](https://github.com/ClickHouse/ClickHouse/pull/62762) ([Anton Popov](https://github.com/CurtizJ)). -* fix: update: nested materialized columns: size check fixes [#62773](https://github.com/ClickHouse/ClickHouse/pull/62773) ([Eliot Hautefeuille](https://github.com/hileef)). -* Fix FINAL modifier is not respected in CTE with analyzer [#62811](https://github.com/ClickHouse/ClickHouse/pull/62811) ([Duc Canh Le](https://github.com/canhld94)). -* Fix crash in function `formatRow` with `JSON` format and HTTP interface [#62840](https://github.com/ClickHouse/ClickHouse/pull/62840) ([Anton Popov](https://github.com/CurtizJ)). -* Azure: fix building final url from endpoint object [#62850](https://github.com/ClickHouse/ClickHouse/pull/62850) ([Daniel Pozo Escalona](https://github.com/danipozo)). -* Fix GCD codec [#62853](https://github.com/ClickHouse/ClickHouse/pull/62853) ([Nikita Taranov](https://github.com/nickitat)). -* Fix LowCardinality(Nullable) key in hyperrectangle [#62866](https://github.com/ClickHouse/ClickHouse/pull/62866) ([Amos Bird](https://github.com/amosbird)). -* Fix fromUnixtimestamp in joda syntax while the input value beyond UInt32 [#62901](https://github.com/ClickHouse/ClickHouse/pull/62901) ([KevinyhZou](https://github.com/KevinyhZou)). -* Disable optimize_rewrite_aggregate_function_with_if for sum(nullable) [#62912](https://github.com/ClickHouse/ClickHouse/pull/62912) ([Raúl Marín](https://github.com/Algunenano)). -* Fix PREWHERE for StorageBuffer with different source table column types. [#62916](https://github.com/ClickHouse/ClickHouse/pull/62916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix temporary data in cache incorrectly processing failure of cache key directory creation [#62925](https://github.com/ClickHouse/ClickHouse/pull/62925) ([Kseniia Sumarokova](https://github.com/kssenii)). -* gRPC: fix crash on IPv6 peer connection [#62978](https://github.com/ClickHouse/ClickHouse/pull/62978) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Fix possible CHECKSUM_DOESNT_MATCH (and others) during replicated fetches [#62987](https://github.com/ClickHouse/ClickHouse/pull/62987) ([Azat Khuzhin](https://github.com/azat)). -* Fix terminate with uncaught exception in temporary data in cache [#62998](https://github.com/ClickHouse/ClickHouse/pull/62998) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix optimize_rewrite_aggregate_function_with_if implicit cast [#62999](https://github.com/ClickHouse/ClickHouse/pull/62999) ([Raúl Marín](https://github.com/Algunenano)). -* Fix unhandled exception in ~RestorerFromBackup [#63040](https://github.com/ClickHouse/ClickHouse/pull/63040) ([Vitaly Baranov](https://github.com/vitlibar)). -* Do not remove server constants from GROUP BY key for secondary query. [#63047](https://github.com/ClickHouse/ClickHouse/pull/63047) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix incorrect judgement of of monotonicity of function abs [#63097](https://github.com/ClickHouse/ClickHouse/pull/63097) ([Duc Canh Le](https://github.com/canhld94)). -* Make sanity check of settings worse [#63119](https://github.com/ClickHouse/ClickHouse/pull/63119) ([Raúl Marín](https://github.com/Algunenano)). -* Set server name for SSL handshake in MongoDB engine [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)). -* Use user specified db instead of "config" for MongoDB wire protocol version check [#63126](https://github.com/ClickHouse/ClickHouse/pull/63126) ([Alexander Gololobov](https://github.com/davenger)). -* Format SQL security option only in `CREATE VIEW` queries. [#63136](https://github.com/ClickHouse/ClickHouse/pull/63136) ([pufit](https://github.com/pufit)). +* Fix parser error when using COUNT(*) with FILTER clause. [#61357](https://github.com/ClickHouse/ClickHouse/pull/61357) ([Duc Canh Le](https://github.com/canhld94)). +* Fix logical error ''Unexpected return type from materialize. Expected Nullable. Got UInt8' while using group_by_use_nulls with analyzer and materialize/constant in grouping set. Closes [#61531](https://github.com/ClickHouse/ClickHouse/issues/61531). [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix data race between `MOVE PARTITION` query and merges resulting in intersecting parts. [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* TBD. [#61720](https://github.com/ClickHouse/ClickHouse/pull/61720) ([Kruglov Pavel](https://github.com/Avogar)). +* Search for MergeTree to ReplicatedMergeTree conversion flag at the correct location for tables with custom storage policy. [#61769](https://github.com/ClickHouse/ClickHouse/pull/61769) ([Kirill](https://github.com/kirillgarbar)). +* Fix possible connections data-race for distributed_foreground_insert/distributed_background_insert_batch that leads to crashes. [#61867](https://github.com/ClickHouse/ClickHouse/pull/61867) ([Azat Khuzhin](https://github.com/azat)). +* Fix skipping escape sequcne parsing errors during JSON data parsing while using `input_format_allow_errors_num/ratio` settings. [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix writing exception message in output format in HTTP when http_wait_end_of_query is used. Closes [#55101](https://github.com/ClickHouse/ClickHouse/issues/55101). [#61951](https://github.com/ClickHouse/ClickHouse/pull/61951) ([Kruglov Pavel](https://github.com/Avogar)). +* This PR reverts https://github.com/ClickHouse/ClickHouse/pull/61617 and fixed the problem with usage of LowCardinality columns together with JSONExtract function. Previously the user may receive either incorrect result of a logical error. [#61957](https://github.com/ClickHouse/ClickHouse/pull/61957) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixes Crash in Engine Merge if Row Policy does not have expression. [#61971](https://github.com/ClickHouse/ClickHouse/pull/61971) ([Ilya Golshtein](https://github.com/ilejn)). +* Implemented preFinalize, updated finalizeImpl & destructor of WriteBufferAzureBlobStorage to avoided having uncaught exception in destructor. [#61988](https://github.com/ClickHouse/ClickHouse/pull/61988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix CREATE TABLE w/o columns definition for ReplicatedMergeTree (columns will be obtained from replica). [#62040](https://github.com/ClickHouse/ClickHouse/pull/62040) ([Azat Khuzhin](https://github.com/azat)). +* Fix optimize_skip_unused_shards_rewrite_in for composite sharding key (could lead to `NOT_FOUND_COLUMN_IN_BLOCK` and `TYPE_MISMATCH`). [#62047](https://github.com/ClickHouse/ClickHouse/pull/62047) ([Azat Khuzhin](https://github.com/azat)). +* ReadWriteBufferFromHTTP set right header host when redirected. [#62068](https://github.com/ClickHouse/ClickHouse/pull/62068) ([Sema Checherinda](https://github.com/CheSema)). +* Fix external table cannot parse data type Bool. [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)). +* Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value". The feature is broken and can't be disabled individually. [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)). +* Fix override of MergeTree virtual columns. [#62180](https://github.com/ClickHouse/ClickHouse/pull/62180) ([Raúl Marín](https://github.com/Algunenano)). +* Fix query parameter resolution with `allow_experimental_analyzer` enabled. Closes [#62113](https://github.com/ClickHouse/ClickHouse/issues/62113). [#62186](https://github.com/ClickHouse/ClickHouse/pull/62186) ([Dmitry Novik](https://github.com/novikd)). +* This PR makes `RESTORE ON CLUSTER` wait for each `ReplicatedMergeTree` table to stop being readonly before attaching any restored parts to it. Earlier it didn't wait and it could try to attach some parts at nearly the same time as checking other replicas during the table's startup. In rare cases some parts could be not attached at all during `RESTORE ON CLUSTER` because of that issue. [#62207](https://github.com/ClickHouse/ClickHouse/pull/62207) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash on `CREATE TABLE` with `INDEX` containing SQL UDF in expression, close [#62134](https://github.com/ClickHouse/ClickHouse/issues/62134). [#62225](https://github.com/ClickHouse/ClickHouse/pull/62225) ([vdimir](https://github.com/vdimir)). +* Fix `generateRandom` with `NULL` in the seed argument. Fixes [#62092](https://github.com/ClickHouse/ClickHouse/issues/62092). [#62248](https://github.com/ClickHouse/ClickHouse/pull/62248) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix buffer overflow when `DISTINCT` is used with constant values. [#62250](https://github.com/ClickHouse/ClickHouse/pull/62250) ([Antonio Andelic](https://github.com/antonio2368)). +* When some index columns are not loaded into memory for some parts of a *MergeTree table, queries with `FINAL` might produce wrong results. Now we explicitly choose only the common prefix of index columns for all parts to avoid this issue. [#62268](https://github.com/ClickHouse/ClickHouse/pull/62268) ([Nikita Taranov](https://github.com/nickitat)). +* Fix inability to address parametrized view in SELECT queries via aliases. [#62274](https://github.com/ClickHouse/ClickHouse/pull/62274) ([Dmitry Novik](https://github.com/novikd)). +* Fix name resolution in case when identifier is resolved to an executed scalar subquery. [#62281](https://github.com/ClickHouse/ClickHouse/pull/62281) ([Dmitry Novik](https://github.com/novikd)). +* Fix argMax with nullable non native numeric column. [#62285](https://github.com/ClickHouse/ClickHouse/pull/62285) ([Raúl Marín](https://github.com/Algunenano)). +* Fix BACKUP and RESTORE of a materialized view in Ordinary database. [#62295](https://github.com/ClickHouse/ClickHouse/pull/62295) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix data race on scalars in Context. [#62305](https://github.com/ClickHouse/ClickHouse/pull/62305) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix displaying of materialized_view primary_key in system.tables. Previously it was shown empty even when a CREATE query included PRIMARY KEY. [#62319](https://github.com/ClickHouse/ClickHouse/pull/62319) ([Murat Khairulin](https://github.com/mxwell)). +* Do not build multithread insert pipeline for engines without `max_insert_threads` support. Fix insterted rows order in queries like `INSERT INTO FUNCTION file/s3(...) SELECT * FROM ORDER BY col`. [#62333](https://github.com/ClickHouse/ClickHouse/pull/62333) ([vdimir](https://github.com/vdimir)). +* Resolve positional arguments only on the initiator node. Closes [#62289](https://github.com/ClickHouse/ClickHouse/issues/62289). [#62362](https://github.com/ClickHouse/ClickHouse/pull/62362) ([flynn](https://github.com/ucasfl)). +* Fix filter pushdown from additional_table_filters in Merge engine in analyzer. Closes [#62229](https://github.com/ClickHouse/ClickHouse/issues/62229). [#62398](https://github.com/ClickHouse/ClickHouse/pull/62398) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `Unknown expression or table expression identifier` error for `GLOBAL IN table` queries (with new analyzer). Fixes [#62286](https://github.com/ClickHouse/ClickHouse/issues/62286). [#62409](https://github.com/ClickHouse/ClickHouse/pull/62409) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Respect settings truncate_on_insert/create_new_file_on_insert in s3/hdfs/azure engines during partitioned write. Closes [#61492](https://github.com/ClickHouse/ClickHouse/issues/61492). [#62425](https://github.com/ClickHouse/ClickHouse/pull/62425) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix backup restore path for AzureBlobStorage to include specified blob path. [#62447](https://github.com/ClickHouse/ClickHouse/pull/62447) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed rare bug in `SimpleSquashingChunksTransform` that may lead to a loss of the last chunk of data in a stream. [#62451](https://github.com/ClickHouse/ClickHouse/pull/62451) ([Nikita Taranov](https://github.com/nickitat)). +* Fix excessive memory usage for queries with nested lambdas. Fixes [#62036](https://github.com/ClickHouse/ClickHouse/issues/62036). [#62462](https://github.com/ClickHouse/ClickHouse/pull/62462) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix validation of special columns (`ver`, `is_deleted`, `sign`) in MergeTree engines on table creation and alter queries. Fixes [#62463](https://github.com/ClickHouse/ClickHouse/issues/62463). [#62498](https://github.com/ClickHouse/ClickHouse/pull/62498) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Avoid crash when reading protobuf with recursive types. [#62506](https://github.com/ClickHouse/ClickHouse/pull/62506) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [62459](https://github.com/ClickHouse/ClickHouse/issues/62459). [#62524](https://github.com/ClickHouse/ClickHouse/pull/62524) ([helifu](https://github.com/helifu)). +* Fix an error `LIMIT expression must be constant` in queries with constant expression in `LIMIT`/`OFFSET` which contains scalar subquery. Fixes [#62294](https://github.com/ClickHouse/ClickHouse/issues/62294). [#62567](https://github.com/ClickHouse/ClickHouse/pull/62567) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix segmentation fault when using Hive table engine. Reference [#62154](https://github.com/ClickHouse/ClickHouse/issues/62154), [#62560](https://github.com/ClickHouse/ClickHouse/issues/62560). [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix memory leak in groupArraySorted. Fix [#62536](https://github.com/ClickHouse/ClickHouse/issues/62536). [#62597](https://github.com/ClickHouse/ClickHouse/pull/62597) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix crash in largestTriangleThreeBuckets. [#62646](https://github.com/ClickHouse/ClickHouse/pull/62646) ([Raúl Marín](https://github.com/Algunenano)). +* Fix `tumble[Start,End]` and `hop[Start,End]` functions for resolutions bigger than a day. [#62705](https://github.com/ClickHouse/ClickHouse/pull/62705) ([Jordi Villar](https://github.com/jrdi)). +* Fix argMin/argMax combinator state. [#62708](https://github.com/ClickHouse/ClickHouse/pull/62708) ([Raúl Marín](https://github.com/Algunenano)). +* Fix temporary data in cache failing because of a small value of setting `filesystem_cache_reserve_space_wait_lock_timeout_milliseconds`. Introduced a separate setting `temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds`. [#62715](https://github.com/ClickHouse/ClickHouse/pull/62715) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed crash in table function `mergeTreeIndex` after offloading some of the columns from suffix of primary key. [#62762](https://github.com/ClickHouse/ClickHouse/pull/62762) ([Anton Popov](https://github.com/CurtizJ)). +* Fix size checks when updating materialized nested columns ( fixes [#62731](https://github.com/ClickHouse/ClickHouse/issues/62731) ). [#62773](https://github.com/ClickHouse/ClickHouse/pull/62773) ([Eliot Hautefeuille](https://github.com/hileef)). +* Fix an error when `FINAL` is not applied when specified in CTE (new analyzer). Fixes [#62779](https://github.com/ClickHouse/ClickHouse/issues/62779). [#62811](https://github.com/ClickHouse/ClickHouse/pull/62811) ([Duc Canh Le](https://github.com/canhld94)). +* Fixed crash in function `formatRow` with `JSON` format in queries executed via the HTTP interface. [#62840](https://github.com/ClickHouse/ClickHouse/pull/62840) ([Anton Popov](https://github.com/CurtizJ)). +* Fix failure to start when storage account URL has trailing slash. [#62850](https://github.com/ClickHouse/ClickHouse/pull/62850) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Fixed bug in GCD codec implementation that may lead to server crashes. [#62853](https://github.com/ClickHouse/ClickHouse/pull/62853) ([Nikita Taranov](https://github.com/nickitat)). +* Fix incorrect key analysis when LowCardinality(Nullable) keys appear in the middle of a hyperrectangle. This fixes [#62848](https://github.com/ClickHouse/ClickHouse/issues/62848). [#62866](https://github.com/ClickHouse/ClickHouse/pull/62866) ([Amos Bird](https://github.com/amosbird)). +* When we use function `fromUnixTimestampInJodaSyntax` to convert the input `Int64` or `UInt64` value to `DateTime`, sometimes it return the wrong result,because the input value may exceed the maximum value of Uint32 type,and the function will first convert the input value to Uint32, and so would lead to the wrong result. For example we have a table `test_tbl(a Int64, b UInt64)`, and it has a row (`10262736196`, `10262736196`), when use `fromUnixTimestampInJodaSyntax` to convert, the wrong result as below. [#62901](https://github.com/ClickHouse/ClickHouse/pull/62901) ([KevinyhZou](https://github.com/KevinyhZou)). +* Disable optimize_rewrite_aggregate_function_with_if for sum(nullable). [#62912](https://github.com/ClickHouse/ClickHouse/pull/62912) ([Raúl Marín](https://github.com/Algunenano)). +* Fix the `Unexpected return type` error for queries that read from `StorageBuffer` with `PREWHERE` when the source table has different types. Fixes [#62545](https://github.com/ClickHouse/ClickHouse/issues/62545). [#62916](https://github.com/ClickHouse/ClickHouse/pull/62916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix temporary data in cache incorrect behaviour in case creation of cache key base directory fails with `no space left on device`. [#62925](https://github.com/ClickHouse/ClickHouse/pull/62925) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed server crash on IPv6 gRPC client connection. [#62978](https://github.com/ClickHouse/ClickHouse/pull/62978) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix possible CHECKSUM_DOESNT_MATCH (and others) during replicated fetches. [#62987](https://github.com/ClickHouse/ClickHouse/pull/62987) ([Azat Khuzhin](https://github.com/azat)). +* Fix terminate with uncaught exception in temporary data in cache. [#62998](https://github.com/ClickHouse/ClickHouse/pull/62998) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix optimize_rewrite_aggregate_function_with_if implicit cast. [#62999](https://github.com/ClickHouse/ClickHouse/pull/62999) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible crash after unsuccessful RESTORE. This PR fixes [#62985](https://github.com/ClickHouse/ClickHouse/issues/62985). [#63040](https://github.com/ClickHouse/ClickHouse/pull/63040) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `Not found column in block` error for distributed queries with server-side constants in `GROUP BY` key. Fixes [#62682](https://github.com/ClickHouse/ClickHouse/issues/62682). [#63047](https://github.com/ClickHouse/ClickHouse/pull/63047) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect judgement of of monotonicity of function `abs`. [#63097](https://github.com/ClickHouse/ClickHouse/pull/63097) ([Duc Canh Le](https://github.com/canhld94)). +* Sanity check: Clamp values instead of throwing. [#63119](https://github.com/ClickHouse/ClickHouse/pull/63119) ([Raúl Marín](https://github.com/Algunenano)). +* Setting server_name might help with recently reported SSL handshake error when connecting to MongoDB Atlas: `Poco::Exception. Code: 1000, e.code() = 0, SSL Exception: error:10000438:SSL routines:OPENSSL_internal:TLSV1_ALERT_INTERNAL_ERROR`. [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)). +* The wire protocol version check for MongoDB used to try accessing "config" database, but this can fail if the user doesn't have permissions for it. The fix is to use the database name provided by user. [#63126](https://github.com/ClickHouse/ClickHouse/pull/63126) ([Alexander Gololobov](https://github.com/davenger)). +* Fix a bug when `SQL SECURITY` statement appears in all `CREATE` queries if the server setting `ignore_empty_sql_security_in_create_view_query=true` https://github.com/ClickHouse/ClickHouse/pull/63134. [#63136](https://github.com/ClickHouse/ClickHouse/pull/63136) ([pufit](https://github.com/pufit)). #### CI Fix or Improvement (changelog entry is not required) diff --git a/docs/changelogs/v24.5.1.1763-stable.md b/docs/changelogs/v24.5.1.1763-stable.md new file mode 100644 index 00000000000..384e0395c4d --- /dev/null +++ b/docs/changelogs/v24.5.1.1763-stable.md @@ -0,0 +1,366 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.1.1763-stable (647c154a94d) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1) + +#### Backward Incompatible Change +* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)). +* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_functions=1`. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)). +* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Provide support for AzureBlobStorage function in ClickHouse server to use Azure Workload identity to authenticate against Azure blob storage. If `use_workload_identity` parameter is set in config, [workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications) is used for authentication. [#57881](https://github.com/ClickHouse/ClickHouse/pull/57881) ([Vinay Suryadevara](https://github.com/vinay92-ch)). +* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce `StorageEmbeddedRocksDB` table settings. [#59163](https://github.com/ClickHouse/ClickHouse/pull/59163) ([Duc Canh Le](https://github.com/canhld94)). +* User can now parse CRLF with TSV format using a setting `input_format_tsv_crlf_end_of_line`. Closes [#56257](https://github.com/ClickHouse/ClickHouse/issues/56257). [#59747](https://github.com/ClickHouse/ClickHouse/pull/59747) ([Shaun Struwig](https://github.com/Blargian)). +* Adds the Form Format to read/write a single record in the application/x-www-form-urlencoded format. [#60199](https://github.com/ClickHouse/ClickHouse/pull/60199) ([Shaun Struwig](https://github.com/Blargian)). +* Added possibility to compress in CROSS JOIN. [#60459](https://github.com/ClickHouse/ClickHouse/pull/60459) ([p1rattttt](https://github.com/p1rattttt)). +* New setting `input_format_force_null_for_omitted_fields` that forces NULL values for omitted fields. [#60887](https://github.com/ClickHouse/ClickHouse/pull/60887) ([Constantine Peresypkin](https://github.com/pkit)). +* Support join with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y`. To enable, `SET allow_experimental_join_condition = 1`. [#60920](https://github.com/ClickHouse/ClickHouse/pull/60920) ([lgbo](https://github.com/lgbo-ustc)). +* Earlier our s3 storage and s3 table function didn't support selecting from archive files. I created a solution that allows to iterate over files inside archives in S3. [#62259](https://github.com/ClickHouse/ClickHouse/pull/62259) ([Daniil Ivanik](https://github.com/divanik)). +* Support for conditional function `clamp`. [#62377](https://github.com/ClickHouse/ClickHouse/pull/62377) ([skyoct](https://github.com/skyoct)). +* Add npy output format. [#62430](https://github.com/ClickHouse/ClickHouse/pull/62430) ([豪肥肥](https://github.com/HowePa)). +* Added SQL functions `generateUUIDv7`, `generateUUIDv7ThreadMonotonic`, `generateUUIDv7NonMonotonic` (with different monotonicity/performance trade-offs) to generate version 7 UUIDs aka. timestamp-based UUIDs with random component. Also added a new function `UUIDToNum` to extract bytes from a UUID and a new function `UUIDv7ToDateTime` to extract timestamp component from a UUID version 7. [#62852](https://github.com/ClickHouse/ClickHouse/pull/62852) ([Alexey Petrunyaka](https://github.com/pet74alex)). +* Backported in [#64307](https://github.com/ClickHouse/ClickHouse/issues/64307): Implement Dynamic data type that allows to store values of any type inside it without knowing all of them in advance. Dynamic type is available under a setting `allow_experimental_dynamic_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#63058](https://github.com/ClickHouse/ClickHouse/pull/63058) ([Kruglov Pavel](https://github.com/Avogar)). +* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce StorageEmbeddedRocksDB table settings. [#63324](https://github.com/ClickHouse/ClickHouse/pull/63324) ([Duc Canh Le](https://github.com/canhld94)). +* Raw as a synonym for TSVRaw. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian)). +* Added possibility to do cross join in temporary file if size exceeds limits. [#63432](https://github.com/ClickHouse/ClickHouse/pull/63432) ([p1rattttt](https://github.com/p1rattttt)). +* On Linux and MacOS, if the program has STDOUT redirected to a file with a compression extension, use the corresponding compression method instead of nothing (making it behave similarly to `INTO OUTFILE` ). [#63662](https://github.com/ClickHouse/ClickHouse/pull/63662) ([v01dXYZ](https://github.com/v01dXYZ)). +* Change warning on high number of attached tables to differentiate tables, views and dictionaries. [#64180](https://github.com/ClickHouse/ClickHouse/pull/64180) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). + +#### Performance Improvement +* Skip merging of newly created projection blocks during `INSERT`-s. [#59405](https://github.com/ClickHouse/ClickHouse/pull/59405) ([Nikita Taranov](https://github.com/nickitat)). +* Process string functions XXXUTF8 'asciily' if input strings are all ascii chars. Inspired by https://github.com/apache/doris/pull/29799. Overall speed up by 1.07x~1.62x. Notice that peak memory usage had been decreased in some cases. [#61632](https://github.com/ClickHouse/ClickHouse/pull/61632) ([李扬](https://github.com/taiyang-li)). +* Improved performance of selection (`{}`) globs in StorageS3. [#62120](https://github.com/ClickHouse/ClickHouse/pull/62120) ([Andrey Zvonov](https://github.com/zvonand)). +* HostResolver has each IP address several times. If remote host has several IPs and by some reason (firewall rules for example) access on some IPs allowed and on others forbidden, than only first record of forbidden IPs marked as failed, and in each try these IPs have a chance to be chosen (and failed again). Even if fix this, every 120 seconds DNS cache dropped, and IPs can be chosen again. [#62652](https://github.com/ClickHouse/ClickHouse/pull/62652) ([Anton Ivashkin](https://github.com/ianton-ru)). +* Add a new configuration`prefer_merge_sort_block_bytes` to control the memory usage and speed up sorting 2 times when merging when there are many columns. [#62904](https://github.com/ClickHouse/ClickHouse/pull/62904) ([LiuNeng](https://github.com/liuneng1994)). +* `clickhouse-local` will start faster. In previous versions, it was not deleting temporary directories by mistake. Now it will. This closes [#62941](https://github.com/ClickHouse/ClickHouse/issues/62941). [#63074](https://github.com/ClickHouse/ClickHouse/pull/63074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Micro-optimizations for the new analyzer. [#63429](https://github.com/ClickHouse/ClickHouse/pull/63429) ([Raúl Marín](https://github.com/Algunenano)). +* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63443](https://github.com/ClickHouse/ClickHouse/pull/63443) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63532](https://github.com/ClickHouse/ClickHouse/pull/63532) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up indices of type `set` a little (around 1.5 times) by removing garbage. [#64098](https://github.com/ClickHouse/ClickHouse/pull/64098) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Maps can now have `Float32`, `Float64`, `Array(T)`, `Map(K,V)` and `Tuple(T1, T2, ...)` as keys. Closes [#54537](https://github.com/ClickHouse/ClickHouse/issues/54537). [#59318](https://github.com/ClickHouse/ClickHouse/pull/59318) ([李扬](https://github.com/taiyang-li)). +* Multiline strings with border preservation and column width change. [#59940](https://github.com/ClickHouse/ClickHouse/pull/59940) ([Volodyachan](https://github.com/Volodyachan)). +* Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix a crash in asynchronous stack unwinding (such as when using the sampling query profiler) while interpreting debug info. This closes [#60460](https://github.com/ClickHouse/ClickHouse/issues/60460). [#60468](https://github.com/ClickHouse/ClickHouse/pull/60468) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Distinct messages for s3 error 'no key' for cases disk and storage. [#61108](https://github.com/ClickHouse/ClickHouse/pull/61108) ([Sema Checherinda](https://github.com/CheSema)). +* Less contention in filesystem cache (part 4). Allow to keep filesystem cache not filled to the limit by doing additional eviction in the background (controlled by `keep_free_space_size(elements)_ratio`). This allows to release pressure from space reservation for queries (on `tryReserve` method). Also this is done in a lock free way as much as possible, e.g. should not block normal cache usage. [#61250](https://github.com/ClickHouse/ClickHouse/pull/61250) ([Kseniia Sumarokova](https://github.com/kssenii)). +* The progress bar will work for trivial queries with LIMIT from `system.zeros`, `system.zeros_mt` (it already works for `system.numbers` and `system.numbers_mt`), and the `generateRandom` table function. As a bonus, if the total number of records is greater than the `max_rows_to_read` limit, it will throw an exception earlier. This closes [#58183](https://github.com/ClickHouse/ClickHouse/issues/58183). [#61823](https://github.com/ClickHouse/ClickHouse/pull/61823) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* YAML Merge Key support. [#62685](https://github.com/ClickHouse/ClickHouse/pull/62685) ([Azat Khuzhin](https://github.com/azat)). +* Enhance error message when non-deterministic function is used with Replicated source. [#62896](https://github.com/ClickHouse/ClickHouse/pull/62896) ([Grégoire Pineau](https://github.com/lyrixx)). +* Fix interserver secret for Distributed over Distributed from `remote`. [#63013](https://github.com/ClickHouse/ClickHouse/pull/63013) ([Azat Khuzhin](https://github.com/azat)). +* Allow using `clickhouse-local` and its shortcuts `clickhouse` and `ch` with a query or queries file as a positional argument. Examples: `ch "SELECT 1"`, `ch --param_test Hello "SELECT {test:String}"`, `ch query.sql`. This closes [#62361](https://github.com/ClickHouse/ClickHouse/issues/62361). [#63081](https://github.com/ClickHouse/ClickHouse/pull/63081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support configuration substitutions from YAML files. [#63106](https://github.com/ClickHouse/ClickHouse/pull/63106) ([Eduard Karacharov](https://github.com/korowa)). +* Add TTL information in system parts_columns table. [#63200](https://github.com/ClickHouse/ClickHouse/pull/63200) ([litlig](https://github.com/litlig)). +* Keep previous data in terminal after picking from skim suggestions. [#63261](https://github.com/ClickHouse/ClickHouse/pull/63261) ([FlameFactory](https://github.com/FlameFactory)). +* Width of fields now correctly calculate, ignoring ANSI escape sequences. [#63270](https://github.com/ClickHouse/ClickHouse/pull/63270) ([Shaun Struwig](https://github.com/Blargian)). +* Enable plain_rewritable metadata for local and Azure (azure_blob_storage) object storages. [#63365](https://github.com/ClickHouse/ClickHouse/pull/63365) ([Julia Kartseva](https://github.com/jkartseva)). +* Support English-style Unicode quotes, e.g. “Hello”, ‘world’. This is questionable in general but helpful when you type your query in a word processor, such as Google Docs. This closes [#58634](https://github.com/ClickHouse/ClickHouse/issues/58634). [#63381](https://github.com/ClickHouse/ClickHouse/pull/63381) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allowed to create MaterializedMySQL database without connection to MySQL. [#63397](https://github.com/ClickHouse/ClickHouse/pull/63397) ([Kirill](https://github.com/kirillgarbar)). +* Remove copying data when writing to filesystem cache. [#63401](https://github.com/ClickHouse/ClickHouse/pull/63401) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update the usage of error code `NUMBER_OF_ARGUMENTS_DOESNT_MATCH` by more accurate error codes when appropriate. [#63406](https://github.com/ClickHouse/ClickHouse/pull/63406) ([Yohann Jardin](https://github.com/yohannj)). +* `os_user` and `client_hostname` are now correctly set up for queries for command line suggestions in clickhouse-client. This closes [#63430](https://github.com/ClickHouse/ClickHouse/issues/63430). [#63433](https://github.com/ClickHouse/ClickHouse/pull/63433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed tabulation from line numbering, correct handling of length when moving a line if the value has a tab, added tests. [#63493](https://github.com/ClickHouse/ClickHouse/pull/63493) ([Volodyachan](https://github.com/Volodyachan)). +* Add this `aggregate_function_group_array_has_limit_size`setting to support discarding data in some scenarios. [#63516](https://github.com/ClickHouse/ClickHouse/pull/63516) ([zhongyuankai](https://github.com/zhongyuankai)). +* Automatically mark a replica of Replicated database as lost and start recovery if some DDL task fails more than `max_retries_before_automatic_recovery` (100 by default) times in a row with the same error. Also, fixed a bug that could cause skipping DDL entries when an exception is thrown during an early stage of entry execution. [#63549](https://github.com/ClickHouse/ClickHouse/pull/63549) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Automatically correct `max_block_size=0` to default value. [#63587](https://github.com/ClickHouse/ClickHouse/pull/63587) ([Antonio Andelic](https://github.com/antonio2368)). +* Account failed files in `s3queue_tracked_file_ttl_sec` and `s3queue_traked_files_limit` for `StorageS3Queue`. [#63638](https://github.com/ClickHouse/ClickHouse/pull/63638) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a build_id ALIAS column to trace_log to facilitate auto renaming upon detecting binary changes. This is to address [#52086](https://github.com/ClickHouse/ClickHouse/issues/52086). [#63656](https://github.com/ClickHouse/ClickHouse/pull/63656) ([Zimu Li](https://github.com/woodlzm)). +* Enable truncate operation for object storage disks. [#63693](https://github.com/ClickHouse/ClickHouse/pull/63693) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* The loading of the keywords list is now dependent on the server revision and will be disabled for the old versions of ClickHouse server. CC @azat. [#63786](https://github.com/ClickHouse/ClickHouse/pull/63786) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow trailing commas in the columns list in the INSERT query. For example, `INSERT INTO test (a, b, c, ) VALUES ...`. [#63803](https://github.com/ClickHouse/ClickHouse/pull/63803) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better exception messages for the `Regexp` format. [#63804](https://github.com/ClickHouse/ClickHouse/pull/63804) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow trailing commas in the `Values` format. For example, this query is allowed: `INSERT INTO test (a, b, c) VALUES (4, 5, 6,);`. [#63810](https://github.com/ClickHouse/ClickHouse/pull/63810) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clickhouse disks have to read server setting to obtain actual metadata format version. [#63831](https://github.com/ClickHouse/ClickHouse/pull/63831) ([Sema Checherinda](https://github.com/CheSema)). +* Disable pretty format restrictions (`output_format_pretty_max_rows`/`output_format_pretty_max_value_width`) when stdout is not TTY. [#63942](https://github.com/ClickHouse/ClickHouse/pull/63942) ([Azat Khuzhin](https://github.com/azat)). +* Exception handling now works when ClickHouse is used inside AWS Lambda. Author: [Alexey Coolnev](https://github.com/acoolnev). [#64014](https://github.com/ClickHouse/ClickHouse/pull/64014) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Throw `CANNOT_DECOMPRESS` instread of `CORRUPTED_DATA` on invalid compressed data passed via HTTP. [#64036](https://github.com/ClickHouse/ClickHouse/pull/64036) ([vdimir](https://github.com/vdimir)). +* A tip for a single large number in Pretty formats now works for Nullable and LowCardinality. This closes [#61993](https://github.com/ClickHouse/ClickHouse/issues/61993). [#64084](https://github.com/ClickHouse/ClickHouse/pull/64084) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now backups with azure blob storage will use multicopy. [#64116](https://github.com/ClickHouse/ClickHouse/pull/64116) ([alesapin](https://github.com/alesapin)). +* Add metrics, logs, and thread names around parts filtering with indices. [#64130](https://github.com/ClickHouse/ClickHouse/pull/64130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to use native copy for azure even with different containers. [#64154](https://github.com/ClickHouse/ClickHouse/pull/64154) ([alesapin](https://github.com/alesapin)). +* Finally enable native copy for azure. [#64182](https://github.com/ClickHouse/ClickHouse/pull/64182) ([alesapin](https://github.com/alesapin)). +* Ignore `allow_suspicious_primary_key` on `ATTACH` and verify on `ALTER`. [#64202](https://github.com/ClickHouse/ClickHouse/pull/64202) ([Azat Khuzhin](https://github.com/azat)). + +#### Build/Testing/Packaging Improvement +* ClickHouse is built with clang-18. A lot of new checks from clang-tidy-18 have been enabled. [#60469](https://github.com/ClickHouse/ClickHouse/pull/60469) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Re-enable broken s390x build in CI. [#63135](https://github.com/ClickHouse/ClickHouse/pull/63135) ([Harry Lee](https://github.com/HarryLeeIBM)). +* The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Information about every symbol in every translation unit will be collected in the CI database for every build in the CI. This closes [#63494](https://github.com/ClickHouse/ClickHouse/issues/63494). [#63495](https://github.com/ClickHouse/ClickHouse/pull/63495) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Experimentally support loongarch64 as a new platform for ClickHouse. [#63733](https://github.com/ClickHouse/ClickHouse/pull/63733) ([qiangxuhui](https://github.com/qiangxuhui)). +* Update Apache Datasketches library. It resolves [#63858](https://github.com/ClickHouse/ClickHouse/issues/63858). [#63923](https://github.com/ClickHouse/ClickHouse/pull/63923) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable GRPC support for aarch64 linux while cross-compiling binary. [#64072](https://github.com/ClickHouse/ClickHouse/pull/64072) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix making backup when multiple shards are used. This PR fixes [#56566](https://github.com/ClickHouse/ClickHouse/issues/56566). [#57684](https://github.com/ClickHouse/ClickHouse/pull/57684) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix passing projections/indexes from CREATE query into inner table of MV. [#59183](https://github.com/ClickHouse/ClickHouse/pull/59183) ([Azat Khuzhin](https://github.com/azat)). +* Fix boundRatio incorrect merge. [#60532](https://github.com/ClickHouse/ClickHouse/pull/60532) ([Tao Wang](https://github.com/wangtZJU)). +* Fix crash when using some functions with low-cardinality columns. [#61966](https://github.com/ClickHouse/ClickHouse/pull/61966) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)). +* Improve the detection of cgroups v2 memory controller in unusual locations. This fixes a warning that the cgroup memory observer was disabled because no cgroups v1 or v2 current memory file could be found. [#62903](https://github.com/ClickHouse/ClickHouse/pull/62903) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix subsequent use of external tables in client. [#62964](https://github.com/ClickHouse/ClickHouse/pull/62964) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)). +* Fix bug which could lead to server to accept connections before server is actually loaded. [#63181](https://github.com/ClickHouse/ClickHouse/pull/63181) ([alesapin](https://github.com/alesapin)). +* Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)). +* Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)). +* JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix infinite loop while listing objects in Azure blob storage. [#63257](https://github.com/ClickHouse/ClickHouse/pull/63257) ([Julia Kartseva](https://github.com/jkartseva)). +* CROSS join can be executed with any value `join_algorithm` setting, close [#62431](https://github.com/ClickHouse/ClickHouse/issues/62431). [#63273](https://github.com/ClickHouse/ClickHouse/pull/63273) ([vdimir](https://github.com/vdimir)). +* Fixed a potential crash caused by a `no space left` error when temporary data in the cache is used. [#63346](https://github.com/ClickHouse/ClickHouse/pull/63346) ([vdimir](https://github.com/vdimir)). +* Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)). +* Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix debug assert when using grouping WITH ROLLUP and LowCardinality types. [#63398](https://github.com/ClickHouse/ClickHouse/pull/63398) ([Raúl Marín](https://github.com/Algunenano)). +* Fix logical errors in queries with `GROUPING SETS` and `WHERE` and `group_by_use_nulls = true`, close [#60538](https://github.com/ClickHouse/ClickHouse/issues/60538). [#63405](https://github.com/ClickHouse/ClickHouse/pull/63405) ([vdimir](https://github.com/vdimir)). +* Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)). +* Insert QueryFinish on AsyncInsertFlush with no data. [#63483](https://github.com/ClickHouse/ClickHouse/pull/63483) ([Raúl Marín](https://github.com/Algunenano)). +* Fix `system.query_log.used_dictionaries` logging. [#63487](https://github.com/ClickHouse/ClickHouse/pull/63487) ([Eduard Karacharov](https://github.com/korowa)). +* Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix rabbitmq heap-use-after-free found by clang-18, which can happen if an error is thrown from RabbitMQ during initialization of exchange and queues. [#63515](https://github.com/ClickHouse/ClickHouse/pull/63515) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash on exit with sentry enabled (due to openssl destroyed before sentry). [#63548](https://github.com/ClickHouse/ClickHouse/pull/63548) ([Azat Khuzhin](https://github.com/azat)). +* Fix support for Array and Map with Keyed hashing functions and materialized keys. [#63628](https://github.com/ClickHouse/ClickHouse/pull/63628) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fixed Parquet filter pushdown not working with Analyzer. [#63642](https://github.com/ClickHouse/ClickHouse/pull/63642) ([Michael Kolupaev](https://github.com/al13n321)). +* It is forbidden to convert MergeTree to replicated if the zookeeper path for this table already exists. [#63670](https://github.com/ClickHouse/ClickHouse/pull/63670) ([Kirill](https://github.com/kirillgarbar)). +* Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix rare case with missing data in the result of distributed query. [#63691](https://github.com/ClickHouse/ClickHouse/pull/63691) ([vdimir](https://github.com/vdimir)). +* Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)). +* Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `SIZES_OF_COLUMNS_DOESNT_MATCH` error for queries with `arrayJoin` function in `WHERE`. Fixes [#63653](https://github.com/ClickHouse/ClickHouse/issues/63653). [#63722](https://github.com/ClickHouse/ClickHouse/pull/63722) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)). +* `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a query with a duplicating cycling alias. Fixes [#63320](https://github.com/ClickHouse/ClickHouse/issues/63320). [#63791](https://github.com/ClickHouse/ClickHouse/pull/63791) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed performance degradation of parsing data formats in INSERT query. This closes [#62918](https://github.com/ClickHouse/ClickHouse/issues/62918). This partially reverts [#42284](https://github.com/ClickHouse/ClickHouse/issues/42284), which breaks the original design and introduces more problems. [#63801](https://github.com/ClickHouse/ClickHouse/pull/63801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add 'endpoint_subpath' S3 URI setting to allow plain_rewritable disks to share the same endpoint. [#63806](https://github.com/ClickHouse/ClickHouse/pull/63806) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix queries using parallel read buffer (e.g. with max_download_thread > 0) getting stuck when threads cannot be allocated. [#63814](https://github.com/ClickHouse/ClickHouse/pull/63814) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)). +* Remove the data from all disks after DROP with the Lazy database engines. Without these changes, orhpaned will remain on the disks. [#63848](https://github.com/ClickHouse/ClickHouse/pull/63848) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)). +* Fixes in `find_super_nodes` and `find_big_family` command of keeper-client: - do not fail on ZNONODE errors - find super nodes inside super nodes - properly calculate subtree node count. [#63862](https://github.com/ClickHouse/ClickHouse/pull/63862) ([Alexander Gololobov](https://github.com/davenger)). +* Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `EXPLAIN CURRENT TRANSACTION` query. [#63926](https://github.com/ClickHouse/ClickHouse/pull/63926) ([Anton Popov](https://github.com/CurtizJ)). +* Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Allow `ALTER TABLE .. MODIFY|RESET SETTING` and `ALTER TABLE .. MODIFY COMMENT` for plain_rewritable disk. [#63933](https://github.com/ClickHouse/ClickHouse/pull/63933) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix Recursive CTE with distributed queries. Closes [#63790](https://github.com/ClickHouse/ClickHouse/issues/63790). [#63939](https://github.com/ClickHouse/ClickHouse/pull/63939) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)). +* Fix the `Not found column` error for queries with `skip_unused_shards = 1`, `LIMIT BY`, and the new analyzer. Fixes [#63943](https://github.com/ClickHouse/ClickHouse/issues/63943). [#63983](https://github.com/ClickHouse/ClickHouse/pull/63983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* (Low-quality third-party Kusto Query Language). Resolve Client Abortion Issue When Using KQL Table Function in Interactive Mode. [#63992](https://github.com/ClickHouse/ClickHouse/pull/63992) ([Yong Wang](https://github.com/kashwy)). +* Backported in [#64356](https://github.com/ClickHouse/ClickHouse/issues/64356): Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). Fixes [#63205](https://github.com/ClickHouse/ClickHouse/issues/63205). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not throw `Storage doesn't support FINAL` error for remote queries over non-MergeTree tables with `final = true` and new analyzer. Fixes [#63960](https://github.com/ClickHouse/ClickHouse/issues/63960). [#64037](https://github.com/ClickHouse/ClickHouse/pull/64037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)). +* Fix unwind on SIGSEGV on aarch64 (due to small stack for signal). [#64058](https://github.com/ClickHouse/ClickHouse/pull/64058) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#64324](https://github.com/ClickHouse/ClickHouse/issues/64324): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Backported in [#64384](https://github.com/ClickHouse/ClickHouse/issues/64384): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix azure backup writing multipart blocks as 1mb (read buffer size) instead of max_upload_part_size. [#64117](https://github.com/ClickHouse/ClickHouse/pull/64117) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#64541](https://github.com/ClickHouse/ClickHouse/issues/64541): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64332](https://github.com/ClickHouse/ClickHouse/issues/64332): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#64692](https://github.com/ClickHouse/ClickHouse/issues/64692): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64411](https://github.com/ClickHouse/ClickHouse/issues/64411): Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. Fixes [#64172](https://github.com/ClickHouse/ClickHouse/issues/64172). [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64625](https://github.com/ClickHouse/ClickHouse/issues/64625): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64682](https://github.com/ClickHouse/ClickHouse/issues/64682): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add ability to run Azure tests in PR with label. [#63196](https://github.com/ClickHouse/ClickHouse/pull/63196) ([alesapin](https://github.com/alesapin)). +* Add azure run with msan. [#63238](https://github.com/ClickHouse/ClickHouse/pull/63238) ([alesapin](https://github.com/alesapin)). +* Improve cloud backport script. [#63282](https://github.com/ClickHouse/ClickHouse/pull/63282) ([Raúl Marín](https://github.com/Algunenano)). +* Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Extra constraints for stress and fuzzer tests. [#63470](https://github.com/ClickHouse/ClickHouse/pull/63470) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)). +* Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)). +* Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)). +* Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)). +* Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)). +* Include checks like `Stateless tests (asan, distributed cache, meta storage in keeper, s3 storage) [2/3]` in `Mergeable Check` and `A Sync`. [#63945](https://github.com/ClickHouse/ClickHouse/pull/63945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)). +* Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#64591](https://github.com/ClickHouse/ClickHouse/issues/64591): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Do not remove server constants from GROUP BY key for secondary query."'. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Introduce bulk loading to StorageEmbeddedRocksDB"'. [#63316](https://github.com/ClickHouse/ClickHouse/pull/63316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable'. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)). +* NO CL ENTRY: 'Revert "Revert "Do not remove server constants from GROUP BY key for secondary query.""'. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "Fix index analysis for `DateTime64`"'. [#63525](https://github.com/ClickHouse/ClickHouse/pull/63525) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Add `jwcrypto` to integration tests runner'. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* NO CL ENTRY: 'Follow-up for the `binary_symbols` table in CI'. [#63802](https://github.com/ClickHouse/ClickHouse/pull/63802) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'chore(ci-workers): remove reusable from tailscale key'. [#63999](https://github.com/ClickHouse/ClickHouse/pull/63999) ([Gabriel Martinez](https://github.com/GMartinez-Sisti)). +* NO CL ENTRY: 'Revert "Update gui.md - Add ch-ui to open-source available tools."'. [#64064](https://github.com/ClickHouse/ClickHouse/pull/64064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Prevent stack overflow in Fuzzer and Stress test'. [#64082](https://github.com/ClickHouse/ClickHouse/pull/64082) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Prevent conversion to Replicated if zookeeper path already exists"'. [#64214](https://github.com/ClickHouse/ClickHouse/pull/64214) ([Sergei Trifonov](https://github.com/serxa)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove http_max_chunk_size setting (too internal) [#60852](https://github.com/ClickHouse/ClickHouse/pull/60852) ([Azat Khuzhin](https://github.com/azat)). +* Fix race in refreshable materialized views causing SELECT to fail sometimes [#60883](https://github.com/ClickHouse/ClickHouse/pull/60883) ([Michael Kolupaev](https://github.com/al13n321)). +* Parallel replicas: table check failover [#61935](https://github.com/ClickHouse/ClickHouse/pull/61935) ([Igor Nikonov](https://github.com/devcrafter)). +* Avoid crashing on column type mismatch in a few dozen places [#62087](https://github.com/ClickHouse/ClickHouse/pull/62087) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix optimize_if_chain_to_multiif const NULL handling [#62104](https://github.com/ClickHouse/ClickHouse/pull/62104) ([Michael Kolupaev](https://github.com/al13n321)). +* Use intrusive lists for `ResourceRequest` instead of deque [#62165](https://github.com/ClickHouse/ClickHouse/pull/62165) ([Sergei Trifonov](https://github.com/serxa)). +* Analyzer: Fix validateAggregates for tables with different aliases [#62346](https://github.com/ClickHouse/ClickHouse/pull/62346) ([vdimir](https://github.com/vdimir)). +* Improve code and tests of `DROP` of multiple tables [#62359](https://github.com/ClickHouse/ClickHouse/pull/62359) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix exception message during writing to partitioned s3/hdfs/azure path with globs [#62423](https://github.com/ClickHouse/ClickHouse/pull/62423) ([Kruglov Pavel](https://github.com/Avogar)). +* Support UBSan on Clang-19 (master) [#62466](https://github.com/ClickHouse/ClickHouse/pull/62466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Save the stacktrace of thread waiting on failing AsyncLoader job [#62719](https://github.com/ClickHouse/ClickHouse/pull/62719) ([Sergei Trifonov](https://github.com/serxa)). +* group_by_use_nulls strikes back [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Analyzer: prefer column name to alias from array join [#62995](https://github.com/ClickHouse/ClickHouse/pull/62995) ([vdimir](https://github.com/vdimir)). +* CI: try separate the workflows file for GitHub's Merge Queue [#63123](https://github.com/ClickHouse/ClickHouse/pull/63123) ([Max K.](https://github.com/maxknv)). +* Try to fix coverage tests [#63130](https://github.com/ClickHouse/ClickHouse/pull/63130) ([Raúl Marín](https://github.com/Algunenano)). +* Fix azure backup flaky test [#63158](https://github.com/ClickHouse/ClickHouse/pull/63158) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Merging [#60920](https://github.com/ClickHouse/ClickHouse/issues/60920) [#63159](https://github.com/ClickHouse/ClickHouse/pull/63159) ([vdimir](https://github.com/vdimir)). +* QueryAnalysisPass improve QUALIFY validation [#63162](https://github.com/ClickHouse/ClickHouse/pull/63162) ([Maksim Kita](https://github.com/kitaisreal)). +* Add numpy tests for different endianness [#63189](https://github.com/ClickHouse/ClickHouse/pull/63189) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fallback action-runner to autoupdate when it's unable to start [#63195](https://github.com/ClickHouse/ClickHouse/pull/63195) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix possible endless loop while reading from azure [#63197](https://github.com/ClickHouse/ClickHouse/pull/63197) ([Anton Popov](https://github.com/CurtizJ)). +* Add information about materialized view security bug fix into the changelog [#63204](https://github.com/ClickHouse/ClickHouse/pull/63204) ([pufit](https://github.com/pufit)). +* Disable one query from 02994_sanity_check_settings [#63208](https://github.com/ClickHouse/ClickHouse/pull/63208) ([Raúl Marín](https://github.com/Algunenano)). +* Enable custom parquet encoder by default, attempt 2 [#63210](https://github.com/ClickHouse/ClickHouse/pull/63210) ([Michael Kolupaev](https://github.com/al13n321)). +* Update version after release [#63215](https://github.com/ClickHouse/ClickHouse/pull/63215) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v24.4.1.2088-stable [#63217](https://github.com/ClickHouse/ClickHouse/pull/63217) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v24.3.3.102-lts [#63226](https://github.com/ClickHouse/ClickHouse/pull/63226) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v24.2.3.70-stable [#63227](https://github.com/ClickHouse/ClickHouse/pull/63227) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Return back [#61551](https://github.com/ClickHouse/ClickHouse/issues/61551) (More optimal loading of marks) [#63233](https://github.com/ClickHouse/ClickHouse/pull/63233) ([Anton Popov](https://github.com/CurtizJ)). +* Hide CI options under a spoiler [#63237](https://github.com/ClickHouse/ClickHouse/pull/63237) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add `FROM` keyword to `TRUNCATE ALL TABLES` [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Minor follow-up to a renaming PR [#63260](https://github.com/ClickHouse/ClickHouse/pull/63260) ([Robert Schulze](https://github.com/rschu1ze)). +* More checks for concurrently deleted files and dirs in system.remote_data_paths [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)). +* Fix SettingsChangesHistory.h for allow_experimental_join_condition [#63278](https://github.com/ClickHouse/ClickHouse/pull/63278) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v23.8.14.6-lts [#63285](https://github.com/ClickHouse/ClickHouse/pull/63285) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix azure flaky test [#63286](https://github.com/ClickHouse/ClickHouse/pull/63286) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix deadlock in `CacheDictionaryUpdateQueue` in case of exception in constructor [#63287](https://github.com/ClickHouse/ClickHouse/pull/63287) ([Nikita Taranov](https://github.com/nickitat)). +* DiskApp: fix 'list --recursive /' and crash on invalid arguments [#63296](https://github.com/ClickHouse/ClickHouse/pull/63296) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix terminate because of unhandled exception in `MergeTreeDeduplicationLog::shutdown` [#63298](https://github.com/ClickHouse/ClickHouse/pull/63298) ([Nikita Taranov](https://github.com/nickitat)). +* Move s3_plain_rewritable unit test to shell [#63317](https://github.com/ClickHouse/ClickHouse/pull/63317) ([Julia Kartseva](https://github.com/jkartseva)). +* Add tests for [#63264](https://github.com/ClickHouse/ClickHouse/issues/63264) [#63321](https://github.com/ClickHouse/ClickHouse/pull/63321) ([Raúl Marín](https://github.com/Algunenano)). +* Try fix segfault in `MergeTreeReadPoolBase::createTask` [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)). +* Update README.md [#63326](https://github.com/ClickHouse/ClickHouse/pull/63326) ([Tyler Hannan](https://github.com/tylerhannan)). +* Skip unaccessible table dirs in system.remote_data_paths [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)). +* Add test for [#56287](https://github.com/ClickHouse/ClickHouse/issues/56287) [#63340](https://github.com/ClickHouse/ClickHouse/pull/63340) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md [#63350](https://github.com/ClickHouse/ClickHouse/pull/63350) ([Tyler Hannan](https://github.com/tylerhannan)). +* Add test for [#48049](https://github.com/ClickHouse/ClickHouse/issues/48049) [#63351](https://github.com/ClickHouse/ClickHouse/pull/63351) ([Raúl Marín](https://github.com/Algunenano)). +* Add option `query_id_prefix` to `clickhouse-benchmark` [#63352](https://github.com/ClickHouse/ClickHouse/pull/63352) ([Anton Popov](https://github.com/CurtizJ)). +* Rollback azurite to working version [#63354](https://github.com/ClickHouse/ClickHouse/pull/63354) ([alesapin](https://github.com/alesapin)). +* Randomize setting `enable_block_offset_column` in stress tests [#63355](https://github.com/ClickHouse/ClickHouse/pull/63355) ([Anton Popov](https://github.com/CurtizJ)). +* Fix AST parsing of invalid type names [#63357](https://github.com/ClickHouse/ClickHouse/pull/63357) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix some 00002_log_and_exception_messages_formatting flakiness [#63358](https://github.com/ClickHouse/ClickHouse/pull/63358) ([Michael Kolupaev](https://github.com/al13n321)). +* Add a test for [#55655](https://github.com/ClickHouse/ClickHouse/issues/55655) [#63380](https://github.com/ClickHouse/ClickHouse/pull/63380) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `reportBrokenPart` [#63396](https://github.com/ClickHouse/ClickHouse/pull/63396) ([Antonio Andelic](https://github.com/antonio2368)). +* Workaround for `oklch()` inside canvas bug for firefox [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)). +* Add test for issue [#47862](https://github.com/ClickHouse/ClickHouse/issues/47862) [#63424](https://github.com/ClickHouse/ClickHouse/pull/63424) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix parsing of `CREATE INDEX` query [#63425](https://github.com/ClickHouse/ClickHouse/pull/63425) ([Anton Popov](https://github.com/CurtizJ)). +* We are using Shared Catalog in the CI Logs cluster [#63442](https://github.com/ClickHouse/ClickHouse/pull/63442) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix collection of coverage data in the CI Logs cluster [#63453](https://github.com/ClickHouse/ClickHouse/pull/63453) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test for rocksdb bulk sink [#63457](https://github.com/ClickHouse/ClickHouse/pull/63457) ([Duc Canh Le](https://github.com/canhld94)). +* io_uring: refactor get reader from context [#63475](https://github.com/ClickHouse/ClickHouse/pull/63475) ([Tomer Shafir](https://github.com/tomershafir)). +* Analyzer setting max_streams_to_max_threads_ratio overflow fix [#63478](https://github.com/ClickHouse/ClickHouse/pull/63478) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting for better rendering of multiline string for pretty format [#63479](https://github.com/ClickHouse/ClickHouse/pull/63479) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix logical error when reloading config with customly created web disk broken after [#56367](https://github.com/ClickHouse/ClickHouse/issues/56367) [#63484](https://github.com/ClickHouse/ClickHouse/pull/63484) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add test for [#49307](https://github.com/ClickHouse/ClickHouse/issues/49307) [#63486](https://github.com/ClickHouse/ClickHouse/pull/63486) ([Anton Popov](https://github.com/CurtizJ)). +* Remove leftovers of GCC support in cmake rules [#63488](https://github.com/ClickHouse/ClickHouse/pull/63488) ([Azat Khuzhin](https://github.com/azat)). +* Fix ProfileEventTimeIncrement code [#63489](https://github.com/ClickHouse/ClickHouse/pull/63489) ([Azat Khuzhin](https://github.com/azat)). +* MergeTreePrefetchedReadPool: Print parent name when logging projection parts [#63522](https://github.com/ClickHouse/ClickHouse/pull/63522) ([Raúl Marín](https://github.com/Algunenano)). +* Correctly stop `asyncCopy` tasks in all cases [#63523](https://github.com/ClickHouse/ClickHouse/pull/63523) ([Antonio Andelic](https://github.com/antonio2368)). +* Almost everything should work on AArch64 (Part of [#58061](https://github.com/ClickHouse/ClickHouse/issues/58061)) [#63527](https://github.com/ClickHouse/ClickHouse/pull/63527) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update randomization of `old_parts_lifetime` [#63530](https://github.com/ClickHouse/ClickHouse/pull/63530) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update 02240_system_filesystem_cache_table.sh [#63531](https://github.com/ClickHouse/ClickHouse/pull/63531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in `DistributedSink` [#63538](https://github.com/ClickHouse/ClickHouse/pull/63538) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix azure tests run on master [#63540](https://github.com/ClickHouse/ClickHouse/pull/63540) ([alesapin](https://github.com/alesapin)). +* Find a proper commit for cumulative `A Sync` status [#63543](https://github.com/ClickHouse/ClickHouse/pull/63543) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add `no-s3-storage` tag to local_plain_rewritable ut [#63546](https://github.com/ClickHouse/ClickHouse/pull/63546) ([Julia Kartseva](https://github.com/jkartseva)). +* Go back to upstream lz4 submodule [#63574](https://github.com/ClickHouse/ClickHouse/pull/63574) ([Raúl Marín](https://github.com/Algunenano)). +* Fix logical error in ColumnTuple::tryInsert() [#63583](https://github.com/ClickHouse/ClickHouse/pull/63583) ([Michael Kolupaev](https://github.com/al13n321)). +* harmonize sumMap error messages on ILLEGAL_TYPE_OF_ARGUMENT [#63619](https://github.com/ClickHouse/ClickHouse/pull/63619) ([Yohann Jardin](https://github.com/yohannj)). +* Update README.md [#63631](https://github.com/ClickHouse/ClickHouse/pull/63631) ([Tyler Hannan](https://github.com/tylerhannan)). +* Ignore global profiler if system.trace_log is not enabled and fix really disable it for keeper standalone build [#63632](https://github.com/ClickHouse/ClickHouse/pull/63632) ([Azat Khuzhin](https://github.com/azat)). +* Fixes for 00002_log_and_exception_messages_formatting [#63634](https://github.com/ClickHouse/ClickHouse/pull/63634) ([Azat Khuzhin](https://github.com/azat)). +* Fix tests flakiness due to long SYSTEM FLUSH LOGS (explicitly specify old_parts_lifetime) [#63639](https://github.com/ClickHouse/ClickHouse/pull/63639) ([Azat Khuzhin](https://github.com/azat)). +* Update clickhouse-test help section [#63663](https://github.com/ClickHouse/ClickHouse/pull/63663) ([Ali](https://github.com/xogoodnow)). +* Fix bad test `02950_part_log_bytes_uncompressed` [#63672](https://github.com/ClickHouse/ClickHouse/pull/63672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove leftovers of `optimize_monotonous_functions_in_order_by` [#63674](https://github.com/ClickHouse/ClickHouse/pull/63674) ([Nikita Taranov](https://github.com/nickitat)). +* tests: attempt to fix 02340_parts_refcnt_mergetree flakiness [#63684](https://github.com/ClickHouse/ClickHouse/pull/63684) ([Azat Khuzhin](https://github.com/azat)). +* Parallel replicas: simple cleanup [#63685](https://github.com/ClickHouse/ClickHouse/pull/63685) ([Igor Nikonov](https://github.com/devcrafter)). +* Cancel S3 reads properly when parallel reads are used [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)). +* Explain map insertion order [#63690](https://github.com/ClickHouse/ClickHouse/pull/63690) ([Mark Needham](https://github.com/mneedham)). +* selectRangesToRead() simple cleanup [#63692](https://github.com/ClickHouse/ClickHouse/pull/63692) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzed analyzer_join_with_constant query [#63702](https://github.com/ClickHouse/ClickHouse/pull/63702) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing explicit instantiations of ColumnUnique [#63718](https://github.com/ClickHouse/ClickHouse/pull/63718) ([Raúl Marín](https://github.com/Algunenano)). +* Better asserts in ColumnString.h [#63719](https://github.com/ClickHouse/ClickHouse/pull/63719) ([Raúl Marín](https://github.com/Algunenano)). +* Don't randomize some settings in 02941_variant_type_* tests to avoid timeouts [#63721](https://github.com/ClickHouse/ClickHouse/pull/63721) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix flaky 03145_non_loaded_projection_backup.sh [#63728](https://github.com/ClickHouse/ClickHouse/pull/63728) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Userspace page cache: don't collect stats if cache is unused [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix insignificant UBSAN error in QueryAnalyzer::replaceNodesWithPositionalArguments() [#63734](https://github.com/ClickHouse/ClickHouse/pull/63734) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix a bug in resolving matcher inside lambda inside ARRAY JOIN [#63744](https://github.com/ClickHouse/ClickHouse/pull/63744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove unused CaresPTRResolver::cancel_requests method [#63754](https://github.com/ClickHouse/ClickHouse/pull/63754) ([Arthur Passos](https://github.com/arthurpassos)). +* Do not hide disk name [#63756](https://github.com/ClickHouse/ClickHouse/pull/63756) ([Kseniia Sumarokova](https://github.com/kssenii)). +* CI: remove Cancel and Debug workflows as redundant [#63757](https://github.com/ClickHouse/ClickHouse/pull/63757) ([Max K.](https://github.com/maxknv)). +* Security Policy: Add notification process [#63773](https://github.com/ClickHouse/ClickHouse/pull/63773) ([Leticia Webb](https://github.com/leticiawebb)). +* Fix typo [#63774](https://github.com/ClickHouse/ClickHouse/pull/63774) ([Anton Popov](https://github.com/CurtizJ)). +* Fix fuzzer when only explicit faults are used [#63775](https://github.com/ClickHouse/ClickHouse/pull/63775) ([Raúl Marín](https://github.com/Algunenano)). +* Settings typo [#63782](https://github.com/ClickHouse/ClickHouse/pull/63782) ([Rory Crispin](https://github.com/RoryCrispin)). +* Changed the previous value of `output_format_pretty_preserve_border_for_multiline_string` setting [#63783](https://github.com/ClickHouse/ClickHouse/pull/63783) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* fix antlr insertStmt for issue 63657 [#63811](https://github.com/ClickHouse/ClickHouse/pull/63811) ([GG Bond](https://github.com/zzyReal666)). +* Fix race in `ReplicatedMergeTreeLogEntryData` [#63816](https://github.com/ClickHouse/ClickHouse/pull/63816) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow allocation during job destructor in `ThreadPool` [#63829](https://github.com/ClickHouse/ClickHouse/pull/63829) ([Antonio Andelic](https://github.com/antonio2368)). +* io_uring: add basic io_uring clickhouse perf test [#63835](https://github.com/ClickHouse/ClickHouse/pull/63835) ([Tomer Shafir](https://github.com/tomershafir)). +* fix typo [#63838](https://github.com/ClickHouse/ClickHouse/pull/63838) ([Alexander Gololobov](https://github.com/davenger)). +* Remove unnecessary logging statements in MergeJoinTransform.cpp [#63860](https://github.com/ClickHouse/ClickHouse/pull/63860) ([vdimir](https://github.com/vdimir)). +* CI: disable ARM integration test cases with libunwind crash [#63867](https://github.com/ClickHouse/ClickHouse/pull/63867) ([Max K.](https://github.com/maxknv)). +* Fix some settings values in 02455_one_row_from_csv_memory_usage test to make it less flaky [#63874](https://github.com/ClickHouse/ClickHouse/pull/63874) ([Kruglov Pavel](https://github.com/Avogar)). +* Randomise `allow_experimental_parallel_reading_from_replicas` in stress tests [#63899](https://github.com/ClickHouse/ClickHouse/pull/63899) ([Nikita Taranov](https://github.com/nickitat)). +* Fix logs test for binary data by converting it to a valid UTF8 string. [#63909](https://github.com/ClickHouse/ClickHouse/pull/63909) ([Alexey Katsman](https://github.com/alexkats)). +* More sanity checks for parallel replicas [#63910](https://github.com/ClickHouse/ClickHouse/pull/63910) ([Nikita Taranov](https://github.com/nickitat)). +* Insignificant libunwind build fixes [#63946](https://github.com/ClickHouse/ClickHouse/pull/63946) ([Azat Khuzhin](https://github.com/azat)). +* Revert multiline pretty changes due to performance problems [#63947](https://github.com/ClickHouse/ClickHouse/pull/63947) ([Raúl Marín](https://github.com/Algunenano)). +* Some usability improvements for c++expr script [#63948](https://github.com/ClickHouse/ClickHouse/pull/63948) ([Azat Khuzhin](https://github.com/azat)). +* CI: aarch64: disable arm integration tests with kerberaized kafka [#63961](https://github.com/ClickHouse/ClickHouse/pull/63961) ([Max K.](https://github.com/maxknv)). +* Slightly better setting `force_optimize_projection_name` [#63997](https://github.com/ClickHouse/ClickHouse/pull/63997) ([Anton Popov](https://github.com/CurtizJ)). +* Better script to collect symbols statistics [#64013](https://github.com/ClickHouse/ClickHouse/pull/64013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a typo in Analyzer [#64022](https://github.com/ClickHouse/ClickHouse/pull/64022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix libbcrypt for FreeBSD build [#64023](https://github.com/ClickHouse/ClickHouse/pull/64023) ([Azat Khuzhin](https://github.com/azat)). +* Fix searching for libclang_rt.builtins.*.a on FreeBSD [#64051](https://github.com/ClickHouse/ClickHouse/pull/64051) ([Azat Khuzhin](https://github.com/azat)). +* Fix waiting for mutations with retriable errors [#64063](https://github.com/ClickHouse/ClickHouse/pull/64063) ([Alexander Tokmakov](https://github.com/tavplubix)). +* harmonize h3PointDist* error messages [#64080](https://github.com/ClickHouse/ClickHouse/pull/64080) ([Yohann Jardin](https://github.com/yohannj)). +* This log message is better in Trace [#64081](https://github.com/ClickHouse/ClickHouse/pull/64081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* tests: fix expected error for 03036_reading_s3_archives (fixes CI) [#64089](https://github.com/ClickHouse/ClickHouse/pull/64089) ([Azat Khuzhin](https://github.com/azat)). +* Fix sanitizers [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)). +* Update llvm/clang to 18.1.6 [#64091](https://github.com/ClickHouse/ClickHouse/pull/64091) ([Azat Khuzhin](https://github.com/azat)). +* CI: mergeable check redesign [#64093](https://github.com/ClickHouse/ClickHouse/pull/64093) ([Max K.](https://github.com/maxknv)). +* Move `isAllASCII` from UTFHelper to StringUtils [#64108](https://github.com/ClickHouse/ClickHouse/pull/64108) ([Robert Schulze](https://github.com/rschu1ze)). +* Clean up .clang-tidy after transition to Clang 18 [#64111](https://github.com/ClickHouse/ClickHouse/pull/64111) ([Robert Schulze](https://github.com/rschu1ze)). +* Ignore exception when checking for cgroupsv2 [#64118](https://github.com/ClickHouse/ClickHouse/pull/64118) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix UBSan error in negative positional arguments [#64127](https://github.com/ClickHouse/ClickHouse/pull/64127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Syncing code [#64135](https://github.com/ClickHouse/ClickHouse/pull/64135) ([Antonio Andelic](https://github.com/antonio2368)). +* Losen build resource limits for unusual architectures [#64152](https://github.com/ClickHouse/ClickHouse/pull/64152) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix clang tidy [#64179](https://github.com/ClickHouse/ClickHouse/pull/64179) ([Han Fei](https://github.com/hanfei1991)). +* Fix global query profiler [#64187](https://github.com/ClickHouse/ClickHouse/pull/64187) ([Azat Khuzhin](https://github.com/azat)). +* CI: cancel running PR wf after adding to MQ [#64188](https://github.com/ClickHouse/ClickHouse/pull/64188) ([Max K.](https://github.com/maxknv)). +* Add debug logging to EmbeddedRocksDBBulkSink [#64203](https://github.com/ClickHouse/ClickHouse/pull/64203) ([vdimir](https://github.com/vdimir)). +* Fix special builds (due to excessive resource usage - memory/CPU) [#64204](https://github.com/ClickHouse/ClickHouse/pull/64204) ([Azat Khuzhin](https://github.com/azat)). +* Add gh to style-check dockerfile [#64227](https://github.com/ClickHouse/ClickHouse/pull/64227) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Followup for [#63691](https://github.com/ClickHouse/ClickHouse/issues/63691) [#64285](https://github.com/ClickHouse/ClickHouse/pull/64285) ([vdimir](https://github.com/vdimir)). +* Rename allow_deprecated_functions to allow_deprecated_error_prone_win… [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)). +* Update description for settings `cross_join_min_rows_to_compress` and `cross_join_min_bytes_to_compress` [#64360](https://github.com/ClickHouse/ClickHouse/pull/64360) ([Nikita Fomichev](https://github.com/fm4v)). +* Rename aggregate_function_group_array_has_limit_size [#64362](https://github.com/ClickHouse/ClickHouse/pull/64362) ([Raúl Marín](https://github.com/Algunenano)). +* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)). +* Clean settings in 02943_variant_read_subcolumns test [#64437](https://github.com/ClickHouse/ClickHouse/pull/64437) ([Kruglov Pavel](https://github.com/Avogar)). +* CI: Critical bugfix category in PR template [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/en/development/build-cross-loongarch.md b/docs/en/development/build-cross-loongarch.md new file mode 100644 index 00000000000..9ffe97d3da7 --- /dev/null +++ b/docs/en/development/build-cross-loongarch.md @@ -0,0 +1,32 @@ +--- +slug: /en/development/build-cross-loongarch +sidebar_position: 70 +title: How to Build ClickHouse on Linux for LoongArch64 Architecture +sidebar_label: Build on Linux for LoongArch64 +--- + +As of writing (2024/03/15) building for loongarch considered to be highly experimental. Not all features can be enabled. + +This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with LoongArch64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. + +The cross-build for LoongArch64 is based on the [Build instructions](../development/build.md), follow them first. + +## Install Clang-18 + +Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do +``` +sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" +``` + +## Build ClickHouse {#build-clickhouse} + + +The llvm version required for building must be greater than or equal to 18.1.0. +``` bash +cd ClickHouse +mkdir build-loongarch64 +CC=clang-18 CXX=clang++-18 cmake . -Bbuild-loongarch64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-loongarch64.cmake +ninja -C build-loongarch64 +``` + +The resulting binary will run only on Linux with the LoongArch64 CPU architecture. diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index eddf24448c1..66c6e2c6912 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -13,14 +13,14 @@ The cross-build for macOS is based on the [Build instructions](../development/bu The following sections provide a walk-through for building ClickHouse for `x86_64` macOS. If you’re targeting ARM architecture, simply substitute all occurrences of `x86_64` with `aarch64`. For example, replace `x86_64-apple-darwin` with `aarch64-apple-darwin` throughout the steps. -## Install Clang-17 +## Install clang-18 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. For example the commands for Bionic are like: ``` bash sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-17 main" >> /etc/apt/sources.list -sudo apt-get install clang-17 +sudo apt-get install clang-18 ``` ## Install Cross-Compilation Toolset {#install-cross-compilation-toolset} @@ -59,7 +59,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11 cd ClickHouse mkdir build-darwin cd build-darwin -CC=clang-17 CXX=clang++-17 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. +CC=clang-18 CXX=clang++-18 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. ninja ``` diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index 9ee5346f258..759d97823e2 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-17 CXX=clang++-17 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF +CC=clang-18 CXX=clang++-18 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md index 3c1667350e1..4c111a76d82 100644 --- a/docs/en/development/build-cross-s390x.md +++ b/docs/en/development/build-cross-s390x.md @@ -5,22 +5,13 @@ title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux) sidebar_label: Build on Linux for s390x (zLinux) --- -As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development. +At the time of writing (2024 May), support for the s390x platform is considered experimental, i.e. some features are disabled or broken on s390x. +## Building ClickHouse for s390x -## Building - -s390x has two OpenSSL-related build options. -- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.) -- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake -```bash --DENABLE_OPENSSL_DYNAMIC=0 -``` - -:::note -s390x builds are temporarily disabled in CI. -::: - +s390x has two OpenSSL-related build options: +- By default, OpenSSL is build on s390x as a shared library. This is different from all other platforms, where OpenSSL is build as static library. +- To build OpenSSL as a static library regardless, pass `-DENABLE_OPENSSL_DYNAMIC=0` to CMake. These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04. @@ -31,11 +22,16 @@ apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-g ``` If you wish to cross compile rust code install the rust cross compile target for s390x: + ```bash rustup target add s390x-unknown-linux-gnu ``` +The s390x build uses the mold linker, download it from https://github.com/rui314/mold/releases/download/v2.0.0/mold-2.0.0-x86_64-linux.tar.gz +and place it into your `$PATH`. + To build for s390x: + ```bash cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake .. ninja diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 6aad31ae3b5..227a4d62484 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -109,7 +109,7 @@ The build requires the following components: - Git (used to checkout the sources, not needed for the build) - CMake 3.20 or newer -- Compiler: clang-17 or newer +- Compiler: clang-18 or newer - Linker: lld-17 or newer - Ninja - Yasm diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 46a30f56f11..c348eb5ca07 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -71,7 +71,7 @@ If it fails, fix the style errors following the [code style guide](style.md). ```sh mkdir -p /tmp/test_output # running all checks -docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE clickhouse/style-test +python3 tests/ci/style_check.py --no-push # run specified check script (e.g.: ./check-mypy) docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE --entrypoint= -w/ClickHouse/utils/check-style clickhouse/style-test ./check-mypy @@ -153,7 +153,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t ### Report Details -- **Compiler**: `clang-17`, optionally with the name of a target platform +- **Compiler**: `clang-18`, optionally with the name of a target platform - **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). - **Status**: `success` or `fail` @@ -177,7 +177,7 @@ Performs static analysis and code style checks using `clang-tidy`. The report is There is a convenience `packager` script that runs the clang-tidy build in docker ```sh mkdir build_tidy -./docker/packager/packager --output-dir=./build_tidy --package-type=binary --compiler=clang-17 --debug-build --clang-tidy +./docker/packager/packager --output-dir=./build_tidy --package-type=binary --compiler=clang-18 --debug-build --clang-tidy ``` diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index bbc5fbeebcb..db3eabaecfc 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -1,27 +1,49 @@ --- slug: /en/development/contrib -sidebar_position: 72 +sidebar_position: 73 sidebar_label: Third-Party Libraries description: A list of third-party libraries used --- # Third-Party Libraries Used -ClickHouse utilizes third-party libraries for different purposes, e.g., to connect to other databases, to decode (encode) data during load (save) from (to) disk or to implement certain specialized SQL functions. To be independent of the available libraries in the target system, each third-party library is imported as a Git submodule into ClickHouse's source tree and compiled and linked with ClickHouse. A list of third-party libraries and their licenses can be obtained by the following query: +ClickHouse utilizes third-party libraries for different purposes, e.g., to connect to other databases, to decode/encode data during load/save from/to disk, or to implement certain specialized SQL functions. +To be independent of the available libraries in the target system, each third-party library is imported as a Git submodule into ClickHouse's source tree and compiled and linked with ClickHouse. +A list of third-party libraries and their licenses can be obtained by the following query: ``` sql SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; ``` -Note that the listed libraries are the ones located in the `contrib/` directory of the ClickHouse repository. Depending on the build options, some of the libraries may have not been compiled, and as a result, their functionality may not be available at runtime. +Note that the listed libraries are the ones located in the `contrib/` directory of the ClickHouse repository. +Depending on the build options, some of the libraries may have not been compiled, and, as a result, their functionality may not be available at runtime. [Example](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) -## Adding new third-party libraries and maintaining patches in third-party libraries {#adding-third-party-libraries} +## Adding and maintaining third-party libraries -1. Each third-party library must reside in a dedicated directory under the `contrib/` directory of the ClickHouse repository. Avoid dumps/copies of external code, instead use Git submodule feature to pull third-party code from an external upstream repository. -2. Submodules are listed in `.gitmodule`. If the external library can be used as-is, you may reference the upstream repository directly. Otherwise, i.e. the external library requires patching/customization, create a fork of the official repository in the [ClickHouse organization in GitHub](https://github.com/ClickHouse). -3. In the latter case, create a branch with `clickhouse/` prefix from the branch you want to integrate, e.g. `clickhouse/master` (for `master`) or `clickhouse/release/vX.Y.Z` (for a `release/vX.Y.Z` tag). The purpose of this branch is to isolate customization of the library from upstream work. For example, pulls from the upstream repository into the fork will leave all `clickhouse/` branches unaffected. Submodules in `contrib/` must only track `clickhouse/` branches of forked third-party repositories. -4. To patch a fork of a third-party library, create a dedicated branch with `clickhouse/` prefix in the fork, e.g. `clickhouse/fix-some-desaster`. Finally, merge the patch branch into the custom tracking branch (e.g. `clickhouse/master` or `clickhouse/release/vX.Y.Z`) using a PR. -5. Always create patches of third-party libraries with the official repository in mind. Once a PR of a patch branch to the `clickhouse/` branch in the fork repository is done and the submodule version in ClickHouse official repository is bumped, consider opening another PR from the patch branch to the upstream library repository. This ensures, that 1) the contribution has more than a single use case and importance, 2) others will also benefit from it, 3) the change will not remain a maintenance burden solely on ClickHouse developers. -9. To update a submodule with changes in the upstream repository, first merge upstream `master` (or a new `versionX.Y.Z` tag) into the `clickhouse`-tracking branch in the fork repository. Conflicts with patches/customization will need to be resolved in this merge (see Step 4.). Once the merge is done, bump the submodule in ClickHouse to point to the new hash in the fork. +Each third-party library must reside in a dedicated directory under the `contrib/` directory of the ClickHouse repository. +Avoid dumping copies of external code into the library directory. +Instead create a Git submodule to pull third-party code from an external upstream repository. + +All submodules used by ClickHouse are listed in the `.gitmodule` file. +If the library can be used as-is (the default case), you can reference the upstream repository directly. +If the library needs patching, create a fork of the upstream repository in the [ClickHouse organization on GitHub](https://github.com/ClickHouse). + +In the latter case, we aim to isolate custom patches as much as possible from upstream commits. +To that end, create a branch with prefix `clickhouse/` from the branch or tag you want to integrate, e.g. `clickhouse/master` (for branch `master`) or `clickhouse/release/vX.Y.Z` (for tag `release/vX.Y.Z`). +This ensures that pulls from the upstream repository into the fork will leave custom `clickhouse/` branches unaffected. +Submodules in `contrib/` must only track `clickhouse/` branches of forked third-party repositories. + +Patches are only applied against `clickhouse/` branches of external libraries. +For that, push the patch as a branch with `clickhouse/`, e.g. `clickhouse/fix-some-desaster`. +Then create a PR from the new branch against the custom tracking branch with `clickhouse/` prefix, (e.g. `clickhouse/master` or `clickhouse/release/vX.Y.Z`) and merge the patch. + +Create patches of third-party libraries with the official repository in mind and consider contributing the patch back to the upstream repository. +This makes sure that others will also benefit from the patch and it will not be a maintenance burden for the ClickHouse team. + +To pull upstream changes into the submodule, you can use two methods: +- (less work but less clean): merge upstream `master` into the corresponding `clickhouse/` tracking branch in the forked repository. You will need to resolve merge conflicts with previous custom patches. This method can be used when the `clickhouse/` branch tracks an upstream development branch like `master`, `main`, `dev`, etc. +- (more work but cleaner): create a new branch with `clickhouse/` prefix from the upstream commit or tag you like to integrate. Then re-apply all existing patches using new PRs (or squash them into a single PR). This method can be used when the `clickhouse/` branch tracks a specific upstream version branch or tag. It is cleaner in the sense that custom patches and upstream changes are better isolated from each other. + +Once the submodule has been updated, bump the submodule in ClickHouse to point to the new hash in the fork. diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 6623c46fa9f..ec5760541e8 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -72,7 +72,7 @@ You can also add original ClickHouse repo address to your local repository to pu After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`. :::note -Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md) and so on. +Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md), [on Linux for Linux/LoongArch](build-cross-loongarch.md) and so on. ::: ## Build System {#build-system} @@ -121,7 +121,7 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. -If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-17 CXX=clang++-17`. The clang version will be in the script output. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-18 CXX=clang++-18`. The clang version will be in the script output. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 77a550f2a0e..0f097d27607 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -1,6 +1,6 @@ --- slug: /en/development/style -sidebar_position: 70 +sidebar_position: 71 sidebar_label: C++ Guide description: A list of recommendations regarding coding style, naming convention, formatting and more --- @@ -57,7 +57,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** Add spaces around binary operators (`+`, `-`, `*`, `/`, `%`, …) and the ternary operator `?:`. +**7.** Add spaces around binary operators (`+`, `-`, `*`, `/`, `%`, ...) and the ternary operator `?:`. ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -86,7 +86,7 @@ dst.ClickGoodEvent = click.GoodEvent; If necessary, the operator can be wrapped to the next line. In this case, the offset in front of it is increased. -**11.** Do not use a space to separate unary operators (`--`, `++`, `*`, `&`, …) from the argument. +**11.** Do not use a space to separate unary operators (`--`, `++`, `*`, `&`, ...) from the argument. **12.** Put a space after a comma, but not before it. The same rule goes for a semicolon inside a `for` expression. @@ -115,7 +115,7 @@ public: **16.** If the same `namespace` is used for the entire file, and there isn’t anything else significant, an offset is not necessary inside `namespace`. -**17.** If the block for an `if`, `for`, `while`, or other expression consists of a single `statement`, the curly brackets are optional. Place the `statement` on a separate line, instead. This rule is also valid for nested `if`, `for`, `while`, … +**17.** If the block for an `if`, `for`, `while`, or other expression consists of a single `statement`, the curly brackets are optional. Place the `statement` on a separate line, instead. This rule is also valid for nested `if`, `for`, `while`, ... But if the inner `statement` contains curly brackets or `else`, the external block should be written in curly brackets. diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index efbce54d44b..bbc7dac0a2a 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -1,6 +1,6 @@ --- slug: /en/development/tests -sidebar_position: 71 +sidebar_position: 72 sidebar_label: Testing title: ClickHouse Testing description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index f32698f84f6..2b4d5fe04aa 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -51,6 +51,9 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo ### allows_query_when_mysql_lost `allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`). +### allow_startup_database_without_connection_to_mysql +`allow_startup_database_without_connection_to_mysql` — Allow to create and attach database without available connection to MySQL. Default: `0` (`false`). + ### materialized_mysql_tables_list `materialized_mysql_tables_list` — a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated. diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 44febe78c77..1958250ed73 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -17,6 +17,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... ) ENGINE = EmbeddedRocksDB([ttl, rocksdb_dir, read_only]) PRIMARY KEY(primary_key_name) +[ SETTINGS name=value, ... ] ``` Engine parameters: @@ -29,6 +30,11 @@ Engine parameters: - columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order. - queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`. +Engine settings: + +- `optimize_for_bulk_insert` – Table is optimized for bulk insertions (insert pipeline will create SST files and import to rocksdb database instead of writing to memtables); default value: `1`. +- `bulk_insert_block_size` - Minimum size of SST files (in term of rows) created by bulk insertion; default value: `1048449`. + Example: ``` sql diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index dbd1c270a4a..2749fa7e479 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -118,7 +118,7 @@ If the listing of files contains number ranges with leading zeros, use the const **Example** -Create table with files named `file000`, `file001`, … , `file999`: +Create table with files named `file000`, `file001`, ... , `file999`: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index a4d0cf78066..a8315a5ad9e 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -73,6 +73,7 @@ Optional parameters: - `rabbitmq_queue_consume` - Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings. Default: `false`. - `rabbitmq_username` - RabbitMQ username. - `rabbitmq_password` - RabbitMQ password. +- `reject_unhandled_messages` - Reject messages (send RabbitMQ negative acknowledgement) in case of errors. This setting is automatically enabled if there is a `x-dead-letter-exchange` defined in `rabbitmq_queue_settings_list`. - `rabbitmq_commit_on_select` - Commit messages when select query is made. Default: `false`. - `rabbitmq_max_rows_per_message` — The maximum number of rows written in one RabbitMQ message for row-based formats. Default : `1`. - `rabbitmq_empty_queue_backoff_start` — A start backoff point to reschedule read if the rabbitmq queue is empty. diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index dfa06801d04..cb1da1c8e68 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -178,7 +178,7 @@ If the listing of files contains number ranges with leading zeros, use the const **Example with wildcards 1** -Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Create table with files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 8ebab80423f..aa7fa512480 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -202,8 +202,7 @@ Example: CREATE TABLE s3queue_engine_table (name String, value UInt32) ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') SETTINGS - mode = 'unordered', - keeper_path = '/clickhouse/s3queue/'; + mode = 'unordered'; CREATE TABLE stats (name String, value UInt32) ENGINE = MergeTree() ORDER BY name; diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index 78a27d3ff86..5a81313f62e 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -22,9 +22,8 @@ ORDER BY Distance(vectors, Point) LIMIT N ``` -`vectors` contains N-dimensional values of type [Array](../../../sql-reference/data-types/array.md) or -[Tuple](../../../sql-reference/data-types/tuple.md), for example embeddings. Function `Distance` computes the distance between two vectors. -Often, the Euclidean (L2) distance is chosen as distance function but [other +`vectors` contains N-dimensional values of type [Array(Float32)](../../../sql-reference/data-types/array.md), for example embeddings. +Function `Distance` computes the distance between two vectors. Often, the Euclidean (L2) distance is chosen as distance function but [other distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17, 0.33, ...)`, and `N` limits the number of search results. @@ -47,7 +46,7 @@ of the search space (using clustering, search trees, etc.) which allows to compu # Creating and Using ANN Indexes {#creating_using_ann_indexes} -Syntax to create an ANN index over an [Array](../../../sql-reference/data-types/array.md) column: +Syntax to create an ANN index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column: ```sql CREATE TABLE table_with_ann_index @@ -60,19 +59,6 @@ ENGINE = MergeTree ORDER BY id; ``` -Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: - -```sql -CREATE TABLE table_with_ann_index -( - `id` Int64, - `vectors` Tuple(Float32[, Float32[, ...]]), - INDEX [ann_index_name] vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]] -) -ENGINE = MergeTree -ORDER BY id; -``` - ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write requests. @@ -164,7 +150,7 @@ linear surfaces (lines in 2D, planes in 3D etc.). -Syntax to create an Annoy index over an [Array](../../../sql-reference/data-types/array.md) column: +Syntax to create an Annoy index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column: ```sql CREATE TABLE table_with_annoy_index @@ -177,19 +163,6 @@ ENGINE = MergeTree ORDER BY id; ``` -Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: - -```sql -CREATE TABLE table_with_annoy_index -( - id Int64, - vectors Tuple(Float32[, Float32[, ...]]), - INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N] -) -ENGINE = MergeTree -ORDER BY id; -``` - Annoy currently supports two distance functions: - `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)). @@ -203,10 +176,9 @@ Parameter `NumTrees` is the number of trees which the algorithm creates (default more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes. :::note -Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays must have same length. To avoid -errors, you can use a [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT -constraint_name_1 CHECK length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default -values) are not supported. +All arrays must have same length. To avoid errors, you can use a +[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK +length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported. ::: The creation of Annoy indexes (whenever a new part is build, e.g. at the end of a merge) is a relatively slow process. You can increase @@ -264,19 +236,6 @@ ENGINE = MergeTree ORDER BY id; ``` -Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: - -```sql -CREATE TABLE table_with_usearch_index -( - id Int64, - vectors Tuple(Float32[, Float32[, ...]]), - INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N] -) -ENGINE = MergeTree -ORDER BY id; -``` - USearch currently supports two distance functions: - `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)). diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 23d98d4b20e..eda87fd06c1 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -71,7 +71,7 @@ WHERE table = 'visits' └───────────┴───────────────────┴────────┘ ``` -The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries. +The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER ... PARTITION](../../../sql-reference/statements/alter/partition.md) queries. The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query. diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md index 9374f6a3ac1..ec4c14b6bf1 100644 --- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md @@ -2,7 +2,7 @@ slug: /en/engines/table-engines/mergetree-family/invertedindexes sidebar_label: Full-text Indexes description: Quickly find search terms in text. -keywords: [full-text search, text search, inverted, index, indices] +keywords: [full-text search, text search, index, indices] --- # Full-text Search using Full-text Indexes [experimental] @@ -37,7 +37,7 @@ ways, for example with respect to their DDL/DQL syntax or performance/compressio To use full-text indexes, first enable them in the configuration: ```sql -SET allow_experimental_inverted_index = true; +SET allow_experimental_full_text_index = true; ``` An full-text index can be defined on a string column using the following syntax @@ -53,6 +53,10 @@ ENGINE = MergeTree ORDER BY key ``` +:::note +In earlier versions of ClickHouse, the corresponding index type name was `inverted`. +::: + where `N` specifies the tokenizer: - `full_text(0)` (or shorter: `full_text()`) set the tokenizer to "tokens", i.e. split strings along spaces, diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 886c29e755e..689c05a24af 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -178,6 +178,10 @@ Additional parameters that control the behavior of the `MergeTree` (optional): `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting. +#### allow_experimental_optimized_row_order + +`allow_experimental_optimized_row_order` - Experimental. Enables the optimization of the row order during inserts to improve the compressability of the data for compression codecs (e.g. LZ4). Analyzes and reorders the data, and thus increases the CPU overhead of inserts. + **Example of Sections Setting** ``` sql @@ -494,7 +498,7 @@ Syntax: `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, ran #### Special-purpose - Experimental indexes to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details. -- An experimental inverted index to support full-text search. See [here](invertedindexes.md) for details. +- An experimental full-text index to support full-text search. See [here](invertedindexes.md) for details. ### Functions Support {#functions-support} @@ -502,31 +506,31 @@ Conditions in the `WHERE` clause contains calls of the functions that operate wi Indexes of type `set` can be utilized by all functions. The other index types are supported as follows: -| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | inverted | -|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|----------| -| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#equals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](/docs/en/sql-reference/functions/string-search-functions.md/#like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | -| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | -| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ | -| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | -| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ | -| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ | -| [in](/docs/en/sql-reference/functions/in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](/docs/en/sql-reference/functions/in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#less) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#greater) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [empty](/docs/en/sql-reference/functions/array-functions/#empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [notEmpty](/docs/en/sql-reference/functions/array-functions/#notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | -| [has](/docs/en/sql-reference/functions/array-functions/#has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ | -| [hasAny](/docs/en/sql-reference/functions/array-functions/#hasany) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ | -| [hasAll](/docs/en/sql-reference/functions/array-functions/#hasall) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | -| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | -| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | -| hasTokenCaseInsensitive (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ | -| hasTokenCaseInsensitiveOrNull (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ | +| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | full_text | +|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|-----------| +| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#equals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [like](/docs/en/sql-reference/functions/string-search-functions.md/#like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ | +| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ | +| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ | +| [in](/docs/en/sql-reference/functions/in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](/docs/en/sql-reference/functions/in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#less) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#greater) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [empty](/docs/en/sql-reference/functions/array-functions/#empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [notEmpty](/docs/en/sql-reference/functions/array-functions/#notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [has](/docs/en/sql-reference/functions/array-functions/#has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ | +| [hasAny](/docs/en/sql-reference/functions/array-functions/#hasany) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ | +| [hasAll](/docs/en/sql-reference/functions/array-functions/#hasall) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | +| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | +| hasTokenCaseInsensitive (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ | +| hasTokenCaseInsensitiveOrNull (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ | Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. @@ -954,7 +958,7 @@ In the case of `MergeTree` tables, data is getting to disk in different ways: - As a result of an insert (`INSERT` query). - During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). - When downloading from another replica. -- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). +- As a result of partition freezing [ALTER TABLE ... FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy: @@ -966,7 +970,7 @@ Under the hood, mutations and partition freezing make use of [hard links](https: In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file. Data is never transferred from the last one and into the first one. One may use system tables [system.part_log](/docs/en/operations/system-tables/part_log.md/#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](/docs/en/operations/system-tables/parts.md/#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs. -User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](/docs/en/sql-reference/statements/alter/partition.md/#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. +User can force moving a part or a partition from one volume to another using the query [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](/docs/en/sql-reference/statements/alter/partition.md/#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. Moving data does not interfere with data replication. Therefore, different storage policies can be specified for the same table on different replicas. diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index a6258bcd581..5a0a2691a9e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -45,7 +45,7 @@ When merging, `ReplacingMergeTree` from all the rows with the same sorting key l - The last in the selection, if `ver` not set. A selection is a set of rows in a set of parts participating in the merge. The most recently created part (the last insert) will be the last one in the selection. Thus, after deduplication, the very last row from the most recent insert will remain for each unique sorting key. - With the maximum version, if `ver` specified. If `ver` is the same for several rows, then it will use "if `ver` is not specified" rule for them, i.e. the most recent inserted row will remain. -Example: +Example: ```sql -- without ver - the last inserted 'wins' @@ -90,14 +90,14 @@ SELECT * FROM mySecondReplacingMT FINAL; ### is_deleted -`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row. +`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a "deleted" row, `0` is a "state" row. Column data type — `UInt8`. :::note `is_deleted` can only be enabled when `ver` is used. -The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used. +The row is deleted only when `OPTIMIZE ... FINAL CLEANUP`. This `CLEANUP` special keyword is not allowed by default unless `allow_experimental_replacing_merge_with_cleanup` MergeTree setting is enabled. No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. @@ -114,21 +114,22 @@ CREATE OR REPLACE TABLE myThirdReplacingMT `is_deleted` UInt8 ) ENGINE = ReplacingMergeTree(eventTime, is_deleted) -ORDER BY key; +ORDER BY key +SETTINGS allow_experimental_replacing_merge_with_cleanup = 1; INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0); -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); select * from myThirdReplacingMT final; 0 rows in set. Elapsed: 0.003 sec. -- delete rows with is_deleted -OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; +OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); -select * from myThirdReplacingMT final; +select * from myThirdReplacingMT final; ┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ │ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 822e2bc385f..65a5f58b166 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -10,7 +10,7 @@ sidebar_label: Data Replication In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace: ```sql -ENGINE = ReplicatedReplacingMergeTree( +ENGINE = ReplicatedMergeTree( '/clickhouse/tables/{shard}/table_name', '{replica}', ver @@ -20,7 +20,7 @@ ENGINE = ReplicatedReplacingMergeTree( with: ```sql -ENGINE = ReplicatedReplacingMergeTree +ENGINE = ReplicatedMergeTree ``` ::: @@ -140,11 +140,11 @@ The system monitors data synchronicity on replicas and is able to recover after :::note In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace: ``` -ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver) ``` with: ``` -ENGINE = ReplicatedReplacingMergeTree +ENGINE = ReplicatedMergeTree ``` ::: @@ -177,7 +177,7 @@ CREATE TABLE table_name CounterID UInt32, UserID UInt32, ver UInt16 -) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver) +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver) PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID); diff --git a/docs/en/engines/table-engines/special/external-data.md b/docs/en/engines/table-engines/special/external-data.md index 7ea3f3e30d6..f6d6dae7eb6 100644 --- a/docs/en/engines/table-engines/special/external-data.md +++ b/docs/en/engines/table-engines/special/external-data.md @@ -29,7 +29,7 @@ Only a single table can be retrieved from stdin. The following parameters are optional: **–name**– Name of the table. If omitted, _data is used. **–format** – Data format in the file. If omitted, TabSeparated is used. -One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, … +One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, ... **–structure**– The table structure in the format`UserID UInt64`, `URL String`. Defines the column names and types. The files specified in ‘file’ will be parsed by the format specified in ‘format’, using the data types specified in ‘types’ or ‘structure’. The table will be uploaded to the server and accessible there as a temporary table with the name in ‘name’. diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index fdf5242ba3b..0d422f64762 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -14,6 +14,10 @@ Usage scenarios: - Convert data from one format to another. - Updating data in ClickHouse via editing a file on a disk. +:::note +This engine is not currently available in ClickHouse Cloud, please [use the S3 table function instead](/docs/en/sql-reference/table-functions/s3.md). +::: + ## Usage in ClickHouse Server {#usage-in-clickhouse-server} ``` sql diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 6525c29306a..67752f223ce 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -111,29 +111,10 @@ clickhouse-client # or "clickhouse-client --password" if you've set up a passwor ```
-Deprecated Method for installing deb-packages - -``` bash -sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 - -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ - /etc/apt/sources.list.d/clickhouse.list -sudo apt-get update - -sudo apt-get install -y clickhouse-server clickhouse-client - -sudo service clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. -``` - -
- -
-Migration Method for installing the deb-packages +Old distributions method for installing the deb-packages ```bash -sudo apt-key del E0C56BD4 +sudo apt-get install apt-transport-https ca-certificates dirmngr sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list @@ -240,22 +221,6 @@ sudo systemctl start clickhouse-keeper sudo systemctl status clickhouse-keeper ``` -
- -Deprecated Method for installing rpm-packages - -``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo -sudo yum install clickhouse-server clickhouse-client - -sudo /etc/init.d/clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. -``` - -
- You can replace `stable` with `lts` to use different [release kinds](/knowledgebase/production) based on your needs. Then run these commands to install packages: @@ -308,33 +273,6 @@ tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \ sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ``` -
- -Deprecated Method for installing tgz archives - -``` bash -export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ - grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh -``` -
- For production environments, it’s recommended to use the latest `stable`-version. You can find its number on GitHub page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`. ### From Docker Image {#from-docker-image} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 03cf345349e..66d5bd2e574 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -75,7 +75,7 @@ The supported formats are: | [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | | [ORC](#data-format-orc) | ✔ | ✔ | | [One](#data-format-one) | ✔ | ✗ | -| [Npy](#data-format-npy) | ✔ | ✗ | +| [Npy](#data-format-npy) | ✔ | ✔ | | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | @@ -91,6 +91,7 @@ The supported formats are: | [MySQLDump](#mysqldump) | ✔ | ✗ | | [DWARF](#dwarf) | ✔ | ✗ | | [Markdown](#markdown) | ✗ | ✔ | +| [Form](#form) | ✔ | ✗ | You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section. @@ -196,6 +197,7 @@ SELECT * FROM nestedt FORMAT TSV - [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. - [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. - [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV input format will be `\r\n` instead of `\n`. Default value - `false`. - [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. - [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`. - [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. @@ -206,7 +208,7 @@ SELECT * FROM nestedt FORMAT TSV Differs from `TabSeparated` format in that the rows are written without escaping. When parsing with this format, tabs or linefeeds are not allowed in each field. -This format is also available under the name `TSVRaw`. +This format is also available under the names `TSVRaw`, `Raw`. ## TabSeparatedWithNames {#tabseparatedwithnames} @@ -241,14 +243,14 @@ This format is also available under the name `TSVWithNamesAndTypes`. Differs from `TabSeparatedWithNames` format in that the rows are written without escaping. When parsing with this format, tabs or linefeeds are not allowed in each field. -This format is also available under the name `TSVRawWithNames`. +This format is also available under the names `TSVRawWithNames`, `RawWithNames`. ## TabSeparatedRawWithNamesAndTypes {#tabseparatedrawwithnamesandtypes} Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping. When parsing with this format, tabs or linefeeds are not allowed in each field. -This format is also available under the name `TSVRawWithNamesAndNames`. +This format is also available under the names `TSVRawWithNamesAndNames`, `RawWithNamesAndNames`. ## Template {#format-template} @@ -2465,23 +2467,22 @@ Result: ## Npy {#data-format-npy} -This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse: -| Npy type | ClickHouse type | -|:--------:|:---------------:| -| b1 | UInt8 | -| i1 | Int8 | -| i2 | Int16 | -| i4 | Int32 | -| i8 | Int64 | -| u1 | UInt8 | -| u2 | UInt16 | -| u4 | UInt32 | -| u8 | UInt64 | -| f2 | Float32 | -| f4 | Float32 | -| f8 | Float64 | -| S | String | -| U | String | +This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse: + +| Npy data type (`INSERT`) | ClickHouse data type | Npy data type (`SELECT`) | +|--------------------------|-----------------------------------------------------------------|--------------------------| +| `i1` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `i1` | +| `i2` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `i2` | +| `i4` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `i4` | +| `i8` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `i8` | +| `u1`, `b1` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `u1` | +| `u2` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `u2` | +| `u4` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `u4` | +| `u8` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `u8` | +| `f2`, `f4` | [Float32](/docs/en/sql-reference/data-types/float.md) | `f4` | +| `f8` | [Float64](/docs/en/sql-reference/data-types/float.md) | `f8` | +| `S`, `U` | [String](/docs/en/sql-reference/data-types/string.md) | `S` | +| | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `S` | **Example of saving an array in .npy format using Python** @@ -2508,6 +2509,14 @@ Result: └───────────────┘ ``` +**Selecting Data** + +You can select data from a ClickHouse table and save them into some file in the Npy format by the following command: + +```bash +$ clickhouse-client --query="SELECT {column} FROM {some_table} FORMAT Npy" > {filename.npy} +``` + ## LineAsString {#lineasstring} In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. @@ -2843,3 +2852,31 @@ FORMAT Markdown ``` Markdown table will be generated automatically and can be used on markdown-enabled platforms, like Github. This format is used only for output. + +## Form {#form} + +The Form format can be used to read or write a single record in the application/x-www-form-urlencoded format in which data is formatted `key1=value1&key2=value2` + +Examples: + +Given a file `data.tmp` placed in the `user_files` path with some URL encoded data: + +```text +t_page=116&c.e=ls7xfkpm&c.tti.m=raf&rt.start=navigation&rt.bmr=390%2C11%2C10 +``` + +```sql +SELECT * FROM file(data.tmp, Form) FORMAT vertical; +``` + +Result: + +```text +Row 1: +────── +t_page: 116 +c.e: ls7xfkpm +c.tti.m: raf +rt.start: navigation +rt.bmr: 390,11,10 +``` diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index be2d028e87f..5b7615485ca 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -8,6 +8,17 @@ sidebar_label: Visual Interfaces ## Open-Source {#open-source} +### ch-ui {#ch-ui} + +[ch-ui](https://github.com/caioricciuti/ch-ui) is a simple React.js app interface for ClickHouse databases, designed for executing queries and visualizing data. Built with React and the ClickHouse client for web, it offers a sleek and user-friendly UI for easy database interactions. + +Features: + +- ClickHouse Integration: Easily manage connections and execute queries. +- Responsive Tab Management: Dynamically handle multiple tabs, such as query and table tabs. +- Performance Optimizations: Utilizes Indexed DB for efficient caching and state management. +- Local Data Storage: All data is stored locally in the browser, ensuring no data is sent anywhere else. + ### Tabix {#tabix} Web interface for ClickHouse in the [Tabix](https://github.com/tabixio/tabix) project. @@ -210,6 +221,18 @@ Features: - Pre-built metrics dashboards. - Multiple users/projects via YAML config. +### clickhouse-monitoring {#clickhouse-monitoring} + +[clickhouse-monitoring](https://github.com/duyet/clickhouse-monitoring) is a simple Next.js dashboard that relies on `system.*` tables to help monitor and provide an overview of your ClickHouse cluster. + +Features: + +- Query monitor: current queries, query history, query resources (memory, parts read, file_open, ...), most expensive queries, most used tables or columns, etc. +- Cluster monitor: total memory/CPU usage, distributed queue, global settings, mergetree settings, metrics, etc. +- Tables and parts information: size, row count, compression, part size, etc., at the column level detail. +- Useful tools: Zookeeper data exploration, query EXPLAIN, kill queries, etc. +- Visualization metric charts: queries and resource usage, number of merges/mutation, merge performance, query performance, etc. + ## Commercial {#commercial} ### DataGrip {#datagrip} diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 2ba50b39934..46c24ad8491 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -22,7 +22,7 @@ description: In order to effectively mitigate possible human errors, you should TEMPORARY TABLE table_name [AS table_name_in_backup] | VIEW view_name [AS view_name_in_backup] ALL TEMPORARY TABLES [EXCEPT ...] | - ALL DATABASES [EXCEPT ...] } [,...] + ALL [EXCEPT ...] } [,...] [ON CLUSTER 'cluster_name'] TO|FROM File('/') | Disk('', '/') | S3('/', '', '') [SETTINGS base_backup = File('/') | Disk(...) | S3('/', '', '')] diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 089704705d0..57fea3cca3a 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -7,6 +7,8 @@ sidebar_label: Configuration Files # Configuration Files The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file, but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`. +(The ClickHouse keeper configuration lives in `/etc/clickhouse-keeper/keeper_config.xml` and thus the additional files need to be placed in `/etc/clickhouse-keeper/keeper_config.d/` ) + It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `...` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent. @@ -67,6 +69,8 @@ generates merged configuration file: ``` +### Using from_env and from_zk + To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`. Example with `$MAX_QUERY_SIZE = 150000`: @@ -93,6 +97,59 @@ which is equal to ``` +The same is possible using `from_zk`: + +``` xml + + + +``` + +``` +# clickhouse-keeper-client +/ :) touch /zk_configs +/ :) create /zk_configs/postgresql_port "9005" +/ :) get /zk_configs/postgresql_port +9005 +``` + +which is equal to + + +``` xml + + 9005 + +``` + +#### Default values for from_env and from_zk attributes + +It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"`. + +With previous example, but `MAX_QUERY_SIZE` is unset: + +``` xml + + + + 150000 + + + +``` + +will take the default value + +``` xml + + + + 150000 + + + +``` + ## Substituting Configuration {#substitution} The config can define substitutions. There are two types of substitutions: diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 28831404a1f..a5fe74fd0c6 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -561,6 +561,25 @@ Default value: 5000 400 ``` +## max\_view\_num\_to\_warn {#max-view-num-to-warn} +If the number of attached views exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 10000 + +**Example** + +``` xml +400 +``` + +## max\_dictionary\_num\_to\_warn {#max-dictionary-num-to-warn} +If the number of attached dictionaries exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 1000 + +**Example** + +``` xml +400 +``` ## max\_part\_num\_to\_warn {#max-part-num-to-warn} If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 76250b80476..c3f303dcd38 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -885,3 +885,47 @@ Default value: false **See Also** - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting + +### allow_experimental_optimized_row_order + +Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part. + +MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec). +Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns. +Long runs of the same value typically compress very well. + +If this setting is enabled, ClickHouse attempts to store the data in newly inserted parts in a row order that minimizes the number of equal-value runs across the columns of the new table part. +In other words, a small number of equal-value runs mean that individual runs are long and compress well. + +Finding the optimal row order is computationally infeasible (NP hard). +Therefore, ClickHouse uses a heuristics to quickly find a row order which still improves compression rates over the original row order. + +
+ +Heuristics for finding a row order + +It is generally possible to shuffle the rows of a table (or table part) freely as SQL considers the same table (table part) in different row order equivalent. + +This freedom of shuffling rows is restricted when a primary key is defined for the table. +In ClickHouse, a primary key `C1, C2, ..., CN` enforces that the table rows are sorted by columns `C1`, `C2`, ... `Cn` ([clustered index](https://en.wikipedia.org/wiki/Database_index#Clustered)). +As a result, rows can only be shuffled within "equivalence classes" of row, i.e. rows which have the same values in their primary key columns. +The intuition is that primary keys with high-cardinality, e.g. primary keys involving a `DateTime64` timestamp column, lead to many small equivalence classes. +Likewise, tables with a low-cardinality primary key, create few and large equivalence classes. +A table with no primary key represents the extreme case of a single equivalence class which spans all rows. + +The fewer and the larger the equivalence classes are, the higher the degree of freedom when re-shuffling rows. + +The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemir, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns. +It performs three steps: +1. Find all equivalence classes based on the row values in primary key columns. +2. For each equivalence class, calculate (usually estimate) the cardinalities of the non-primary-key columns. +3. For each equivalence class, sort the rows in order of ascending non-primary-key column cardinality. + +
+ +If enabled, insert operations incur additional CPU costs to analyze and optimize the row order of the new data. +INSERTs are expected to take 30-50% longer depending on the data characteristics. +Compression rates of LZ4 or ZSTD improve on average by 20-40%. + +This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values. +High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index d86f18ff982..2a20e74e20f 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -303,7 +303,7 @@ What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘ Limits the number of rows in the hash table that is used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. If a query contains multiple joins, ClickHouse checks this setting for every intermediate result. @@ -320,7 +320,7 @@ Default value: 0. Limits the size in bytes of the hash table used when joining tables. -This setting applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). +This setting applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). If the query contains joins, ClickHouse checks this setting for every intermediate result. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 6666f68c177..1a27b350652 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -831,7 +831,13 @@ Default value: `0`. ### output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line} -Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). +Use DOS/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). + +Disabled by default. + +### input_format_tsv_crlf_end_of_line {#input_format_tsv_crlf_end_of_line} + +Use DOS/Windows-style line separator (CRLF) for TSV input files instead of Unix style (LF). Disabled by default. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4f158a4fd6e..0b905df21d4 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1925,7 +1925,9 @@ Default value: `16`. ### wait_for_async_insert {#wait-for-async-insert} -Enables or disables waiting for processing of asynchronous insertion. If enabled, server will return `OK` only after the data is inserted. Otherwise, it will return `OK` even if the data wasn't inserted. +Enables or disables waiting for processing of asynchronous insertion. If enabled, server will return `OK` only after the data is inserted. Otherwise, it will return `OK` as soon it has received the data, but it might still fail to parse or insert it later (You can check in system.asynchronous_insert_log) + +If you want to use asynchronous inserts, we need to also enable [`async_insert`](#async-insert). Possible values: @@ -1954,7 +1956,7 @@ Possible values: - Positive integer. - 0 — Asynchronous insertions are disabled. -Default value: `1000000`. +Default value: `10485760`. ### async_insert_max_query_number {#async-insert-max-query-number} @@ -2246,7 +2248,7 @@ Default value: 0. ## count_distinct_implementation {#count_distinct_implementation} -Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction. +Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction. Possible values: @@ -3663,6 +3665,26 @@ Possible values: Default value: `0`. +## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist} + +Ignore absence of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + +## s3_validate_request_settings {#s3_validate_request_settings} + +Enables s3 request settings validation. + +Possible values: +- 1 — validate settings. +- 0 — do not validate settings. + +Default value: `1`. + ## hdfs_truncate_on_insert {#hdfs_truncate_on_insert} Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. @@ -3695,6 +3717,56 @@ Possible values: Default value: `0`. +## hdfs_throw_on_zero_files_match {#hdfs_throw_on_zero_files_match} + +Throw an error if matched zero files according to glob expansion rules. + +Possible values: +- 1 — `SELECT` throws an exception. +- 0 — `SELECT` returns empty result. + +Default value: `0`. + +## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist} + +Ignore absence of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + +## azure_throw_on_zero_files_match {#azure_throw_on_zero_files_match} + +Throw an error if matched zero files according to glob expansion rules. + +Possible values: +- 1 — `SELECT` throws an exception. +- 0 — `SELECT` returns empty result. + +Default value: `0`. + +## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist} + +Ignore absence of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + +## azure_skip_empty_files {#azure_skip_empty_files} + +Enables or disables skipping empty files in S3 engine. + +Possible values: +- 0 — `SELECT` throws an exception if empty file is not compatible with requested format. +- 1 — `SELECT` returns empty result for empty file. + +Default value: `0`. + ## engine_url_skip_empty_files {#engine_url_skip_empty_files} Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables. @@ -5466,3 +5538,15 @@ Defines how MySQL types are converted to corresponding ClickHouse types. A comma - `datetime64`: convert `DATETIME` and `TIMESTAMP` types to `DateTime64` instead of `DateTime` when precision is not `0`. - `date2Date32`: convert `DATE` to `Date32` instead of `Date`. Takes precedence over `date2String`. - `date2String`: convert `DATE` to `String` instead of `Date`. Overridden by `datetime64`. + +## cross_join_min_rows_to_compress + +Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. + +Default value: `10000000`. + +## cross_join_min_bytes_to_compress + +Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. + +Default value: `1GiB`. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 389c917d427..53ecd66396d 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -7,27 +7,27 @@ title: "External Disks for Storing Data" Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported: 1. [Amazon S3](https://aws.amazon.com/s3/) object storage. -2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) -3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). +2. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). +3. Unsupported: The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) :::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. 1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine. -2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. -3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. +2. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. +3. Unsupported: to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. ::: ## Configuring external storage {#configuring-external-storage} -[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` (unsupported) using a disk with types `s3`, `azure_blob_storage`, `hdfs` (unsupported) accordingly. Disk configuration requires: -1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs` (unsupported), `local_blob_storage`, `web`. 2. Configuration of a specific external storage type. Starting from 24.1 clickhouse version, it is possible to use a new configuration option. It requires to specify: 1. `type` equal to `object_storage` -2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`. +2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs` (unsupported), `local_blob_storage` (or just `local` from `24.3`), `web`. Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web` and, starting from `24.4`, `plain_rewritable`. Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them). @@ -328,7 +328,7 @@ Configuration: ``` -Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type. +Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs` (unsupported), `local`) using `plain` metadata type. Configuration: ``` xml @@ -371,6 +371,8 @@ is equal to ``` +Starting from `24.5` it is possible configure any object storage disk (`s3`, `azure`, `local`) using `plain_rewritable` metadata type. + ### Using Azure Blob Storage {#azure-blob-storage} `MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. @@ -419,6 +421,7 @@ Other parameters: * `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. * `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). * `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +* `metadata_keep_free_space_bytes` - the amount of free metadata disk space to be reserved. Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). @@ -426,12 +429,14 @@ Examples of working configurations can be found in integration tests directory ( Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: -## Using HDFS storage {#hdfs-storage} +## Using HDFS storage (Unsupported) In this sample configuration: -- the disk is of type `hdfs` +- the disk is of type `hdfs` (unsupported) - the data is hosted at `hdfs://hdfs1:9000/clickhouse/` +By the way, HDFS is unsupported and therefore there might be issues when using it. Feel free to make a pull request with the fix if any issue arises. + ```xml @@ -462,9 +467,11 @@ In this sample configuration: ``` +Keep in mind that HDFS may not work in corner cases. + ### Using Data Encryption {#encrypted-virtual-file-system} -You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. +You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) (unsupported) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. Example of disk configuration: @@ -527,7 +534,7 @@ Example of disk configuration: It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. -For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS. +For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS (unsupported). Cache uses `LRU` cache policy. @@ -969,7 +976,7 @@ Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#htt ### Zero-copy Replication (not ready for production) {#zero-copy} -Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. +Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` (unsupported) disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index af582646653..c0caea1ce5e 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -14,7 +14,7 @@ The `system.part_log` table contains the following columns: - `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: - `NewPart` — Inserting of a new data part. - `MergeParts` — Merging of data parts. - - `DownloadParts` — Downloading a data part. + - `DownloadPart` — Downloading a data part. - `RemovePart` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). - `MutatePart` — Mutating of a data part. - `MovePart` — Moving the data part from the one disk to another one. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index d48eb31df00..75b855966a3 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -108,7 +108,7 @@ Columns: - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. - `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution. -- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. +- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. For dictionaries configured using an XML file this is the name of the dictionary, and for dictionaries created by an SQL statement, the canonical name is the fully qualified object name. - `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution. - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md index 417c3460a53..01ffb76e3f2 100644 --- a/docs/en/operations/system-tables/server_settings.md +++ b/docs/en/operations/system-tables/server_settings.md @@ -32,20 +32,21 @@ WHERE name LIKE '%thread_pool%' ``` ``` text -┌─name────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─changeable_without_restart─┬─is_obsolete─┐ -│ max_thread_pool_size │ 10000 │ 10000 │ 0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ No │ 0 │ -│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ No │ 0 │ -│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ No │ 0 │ -│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ No │ 0 │ -│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ No │ 0 │ -│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ No │ 0 │ -│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ No │ 0 │ -│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ No │ 0 │ -│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ No │ 0 │ -│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ No │ 0 │ -│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ No │ 0 │ -│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ No │ 0 │ -└─────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴────────────────────────────┴─────────────┘ +┌─name──────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─changeable_without_restart─┬─is_obsolete─┐ +│ max_thread_pool_size │ 10000 │ 10000 │ 0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ No │ 0 │ +│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ No │ 0 │ +│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ No │ 0 │ +│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ No │ 0 │ +│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ No │ 0 │ +│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ No │ 0 │ +│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ No │ 0 │ +│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ No │ 0 │ +│ max_unexpected_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Unexpected ones) at startup. │ UInt64 │ No │ 0 │ +│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ No │ 0 │ +│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ No │ 0 │ +│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ No │ 0 │ +│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ No │ 0 │ +└───────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴────────────────────────────┴─────────────┘ ``` diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index 4588f68cacd..6407c66783b 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -35,7 +35,7 @@ api_version /keeper/api_version :) ls /keeper/api_version :) cd xyz -Path /keeper/api_version/xyz does not exists +Path /keeper/api_version/xyz does not exist /keeper/api_version :) cd ../../ / :) ls keeper foo bar diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 8981ac1f752..1dc89b8dcf9 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -82,7 +82,7 @@ FROM In this case, you should remember that you do not know the histogram bin borders. -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) Checks whether the sequence contains an event chain that matches the pattern. @@ -172,7 +172,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) +## sequenceCount(pattern)(time, cond1, cond2, ...) Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched. diff --git a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md new file mode 100644 index 00000000000..d9b44b3ff07 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md @@ -0,0 +1,45 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/analysis_of_variance +sidebar_position: 6 +--- + +# analysisOfVariance + +Provides a statistical test for one-way analysis of variance (ANOVA test). It is a test over several groups of normally distributed observations to find out whether all groups have the same mean or not. + +**Syntax** + +```sql +analysisOfVariance(val, group_no) +``` + +Aliases: `anova` + +**Parameters** +- `val`: value. +- `group_no` : group number that `val` belongs to. + +:::note +Groups are enumerated starting from 0 and there should be at least two groups to perform a test. +There should be at least one group with the number of observations greater than one. +::: + +**Returned value** + +- `(f_statistic, p_value)`. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). + +**Example** + +Query: + +```sql +SELECT analysisOfVariance(number, number % 2) FROM numbers(1048575); +``` + +Result: + +```response +┌─analysisOfVariance(number, modulo(number, 2))─┐ +│ (0,1) │ +└───────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index 4631060f33f..f1b5a6683e5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -7,15 +7,50 @@ sidebar_position: 6 Selects the first encountered value of a column. -By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. +**Syntax** +```sql +any(column) +``` + +Aliases: `any_value`, [`first_value`](../reference/first_value.md). + +**Parameters** +- `column`: The column name. + +**Returned value** + +By default, it ignores NULL values and returns the first NOT NULL value found in the column. Like [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) it supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. + +:::note The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour. +::: +:::warning The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. -To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’. +To get a determinate result, you can use the [`min`](../reference/min.md) or [`max`](../reference/max.md) function instead of `any`. +::: -In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY. +**Implementation details** + +In some cases, you can rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`. When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. -- Alias: `any_value`, `first_value`. +**Example** + +Query: + +```sql +CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log; + +INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL); + +SELECT any(city) FROM any_nulls; +``` + +```response +┌─any(city)─┐ +│ Amsterdam │ +└───────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md new file mode 100644 index 00000000000..99104a9b8c7 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md @@ -0,0 +1,44 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/any_respect_nulls +sidebar_position: 103 +--- + +# any_respect_nulls + +Selects the first encountered value of a column, irregardless of whether it is a `NULL` value or not. + +Alias: `any_value_respect_nulls`, `first_value_repect_nulls`. + +**Syntax** + +```sql +any_respect_nulls(column) +``` + +**Parameters** +- `column`: The column name. + +**Returned value** + +- The last value encountered, irregardless of whether it is a `NULL` value or not. + +**Example** + +Query: + +```sql +CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log; + +INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL); + +SELECT any(city), any_respect_nulls(city) FROM any_nulls; +``` + +```response +┌─any(city)─┬─any_respect_nulls(city)─┐ +│ Amsterdam │ ᴺᵁᴸᴸ │ +└───────────┴─────────────────────────┘ +``` + +**See Also** +- [any](../reference/any.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index 351c9fd8e2f..8fcee2cf8e6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -5,5 +5,35 @@ sidebar_position: 104 # anyLast -Selects the last value encountered. -The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. +Selects the last value encountered. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. + +**Syntax** + +```sql +anyLast(column) +``` + +**Parameters** +- `column`: The column name. + +**Returned value** + +- The last value encountered. + +**Example** + +Query: + +```sql +CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log; + +INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL); + +SELECT anyLast(city) FROM any_last_nulls; +``` + +```response +┌─anyLast(city)─┐ +│ Valencia │ +└───────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md new file mode 100644 index 00000000000..b6d0806f35d --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md @@ -0,0 +1,39 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls +sidebar_position: 104 +--- + +# anyLast_respect_nulls + +Selects the last value encountered, irregardless of whether it is `NULL` or not. + +**Syntax** + +```sql +anyLast_respect_nulls(column) +``` + +**Parameters** +- `column`: The column name. + +**Returned value** + +- The last value encountered, irregardless of whether it is `NULL` or not. + +**Example** + +Query: + +```sql +CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log; + +INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL); + +SELECT anyLast(city), anyLast_respect_nulls(city) FROM any_last_nulls; +``` + +```response +┌─anyLast(city)─┬─anyLast_respect_nulls(city)─┐ +│ Valencia │ ᴺᵁᴸᴸ │ +└───────────────┴─────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/corr.md b/docs/en/sql-reference/aggregate-functions/reference/corr.md index 8fa493c9630..5681c942169 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corr.md @@ -5,10 +5,57 @@ sidebar_position: 107 # corr -Syntax: `corr(x, y)` +Calculates the [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient): + +$$ +\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{\sqrt{\Sigma{(x - \bar{x})^2} * \Sigma{(y - \bar{y})^2}}} +$$ -Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. :::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error. -::: \ No newline at end of file +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`corrStable`](../reference/corrstable.md) function. It is slower but provides a more accurate result. +::: + +**Syntax** + +```sql +corr(x, y) +``` + +**Arguments** + +- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). +- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned Value** + +- The Pearson correlation coefficient. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS series; +CREATE TABLE series +( + i UInt32, + x_value Float64, + y_value Float64 +) +ENGINE = Memory; +INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3); +``` + +```sql +SELECT corr(x_value, y_value) +FROM series; +``` + +Result: + +```response +┌─corr(x_value, y_value)─┐ +│ 0.1730265755453256 │ +└────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md new file mode 100644 index 00000000000..718477b28dd --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md @@ -0,0 +1,55 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/corrmatrix +sidebar_position: 108 +--- + +# corrMatrix + +Computes the correlation matrix over N variables. + +**Syntax** + +```sql +corrMatrix(x[, ...]) +``` + +**Arguments** + +- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned value** + +- Correlation matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test; +CREATE TABLE test +( + a UInt32, + b Float64, + c Float64, + d Float64 +) +ENGINE = Memory; +INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762); +``` + +```sql +SELECT arrayMap(x -> round(x, 3), arrayJoin(corrMatrix(a, b, c, d))) AS corrMatrix +FROM test; +``` + +Result: + +```response + ┌─corrMatrix─────────────┐ +1. │ [1,-0.096,0.243,0.746] │ +2. │ [-0.096,1,0.173,0.106] │ +3. │ [0.243,0.173,1,0.258] │ +4. │ [0.746,0.106,0.258,1] │ + └────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/corrstable.md b/docs/en/sql-reference/aggregate-functions/reference/corrstable.md new file mode 100644 index 00000000000..b35442a32b6 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/corrstable.md @@ -0,0 +1,58 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/corrstable +sidebar_position: 107 +--- + +# corrStable + +Calculates the [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient): + +$$ +\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{\sqrt{\Sigma{(x - \bar{x})^2} * \Sigma{(y - \bar{y})^2}}} +$$ + +Similar to the [`corr`](../reference/corr.md) function, but uses a numerically stable algorithm. As a result, `corrStable` is slower than `corr` but produces a more accurate result. + +**Syntax** + +```sql +corrStable(x, y) +``` + +**Arguments** + +- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). +- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned Value** + +- The Pearson correlation coefficient. [Float64](../../data-types/float.md). + +***Example** + +Query: + +```sql +DROP TABLE IF EXISTS series; +CREATE TABLE series +( + i UInt32, + x_value Float64, + y_value Float64 +) +ENGINE = Memory; +INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3); +``` + +```sql +SELECT corrStable(x_value, y_value) +FROM series; +``` + +Result: + +```response +┌─corrStable(x_value, y_value)─┐ +│ 0.17302657554532558 │ +└──────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md index 579035b2fe1..78b9f4cffea 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md @@ -1,14 +1,54 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpop -sidebar_position: 36 +sidebar_position: 37 --- # covarPop -Syntax: `covarPop(x, y)` +Calculates the population covariance: -Calculates the value of `Σ((x - x̅)(y - y̅)) / n`. +$$ +\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{n} +$$ :::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error. -::: \ No newline at end of file +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`covarPopStable`](../reference/covarpopstable.md) function. It works slower but provides a lower computational error. +::: + +**Syntax** + +```sql +covarPop(x, y) +``` + +**Arguments** + +- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). +- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned Value** + +- The population covariance between `x` and `y`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS series; +CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory; +INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3); +``` + +```sql +SELECT covarPop(x_value, y_value) +FROM series; +``` + +Result: + +```reference +┌─covarPop(x_value, y_value)─┐ +│ 6.485648 │ +└────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md new file mode 100644 index 00000000000..d7400599a49 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md @@ -0,0 +1,55 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/covarpopmatrix +sidebar_position: 36 +--- + +# covarPopMatrix + +Returns the population covariance matrix over N variables. + +**Syntax** + +```sql +covarPopMatrix(x[, ...]) +``` + +**Arguments** + +- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned Value** + +- Population covariance matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test; +CREATE TABLE test +( + a UInt32, + b Float64, + c Float64, + d Float64 +) +ENGINE = Memory; +INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762); +``` + +```sql +SELECT arrayMap(x -> round(x, 3), arrayJoin(covarPopMatrix(a, b, c, d))) AS covarPopMatrix +FROM test; +``` + +Result: + +```reference + ┌─covarPopMatrix────────────┐ +1. │ [8.25,-1.76,4.08,6.748] │ +2. │ [-1.76,41.07,6.486,2.132] │ +3. │ [4.08,6.486,34.21,4.755] │ +4. │ [6.748,2.132,4.755,9.93] │ + └───────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md new file mode 100644 index 00000000000..68e78fc3bd8 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md @@ -0,0 +1,60 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/covarpopstable +sidebar_position: 36 +--- + +# covarPopStable + +Calculates the value of the population covariance: + +$$ +\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{n} +$$ + +It is similar to the [covarPop](../reference/covarpop.md) function, but uses a numerically stable algorithm. As a result, `covarPopStable` is slower than `covarPop` but produces a more accurate result. + + +**Syntax** + +```sql +covarPop(x, y) +``` + +**Arguments** + +- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). +- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned Value** + +- The population covariance between `x` and `y`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS series; +CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory; +INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3); +``` + +```sql +SELECT covarPopStable(x_value, y_value) +FROM +( + SELECT + x_value, + y_value + FROM series +); +``` + +Result: + +```reference +┌─covarPopStable(x_value, y_value)─┐ +│ 6.485648 │ +└──────────────────────────────────┘ +``` + diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md index bdcc6c0e3d0..7d5d5d13f35 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md @@ -7,8 +7,74 @@ sidebar_position: 37 Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. -Returns Float64. When `n <= 1`, returns `nan`. - :::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error. +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`covarSampStable`](../reference/covarsamp.md) function. It works slower but provides a lower computational error. ::: + +**Syntax** + +```sql +covarSamp(x, y) +``` + +**Arguments** + +- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). +- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned Value** + +- The sample covariance between `x` and `y`. For `n <= 1`, `nan` is returned. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS series; +CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory; +INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3); +``` + +```sql +SELECT covarSamp(x_value, y_value) +FROM +( + SELECT + x_value, + y_value + FROM series +); +``` + +Result: + +```reference +┌─covarSamp(x_value, y_value)─┐ +│ 7.206275555555556 │ +└─────────────────────────────┘ +``` + +Query: + +```sql +SELECT covarSamp(x_value, y_value) +FROM +( + SELECT + x_value, + y_value + FROM series LIMIT 1 +); + +``` + +Result: + +```reference +┌─covarSamp(x_value, y_value)─┐ +│ nan │ +└─────────────────────────────┘ +``` + + diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md new file mode 100644 index 00000000000..b71d753f0be --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md @@ -0,0 +1,57 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/covarsampmatrix +sidebar_position: 38 +--- + +# covarSampMatrix + +Returns the sample covariance matrix over N variables. + +**Syntax** + +```sql +covarSampMatrix(x[, ...]) +``` + +**Arguments** + +- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned Value** + +- Sample covariance matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test; +CREATE TABLE test +( + a UInt32, + b Float64, + c Float64, + d Float64 +) +ENGINE = Memory; +INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762); +``` + +```sql +SELECT arrayMap(x -> round(x, 3), arrayJoin(covarSampMatrix(a, b, c, d))) AS covarSampMatrix +FROM test; +``` + +Result: + +```reference + ┌─covarSampMatrix─────────────┐ +1. │ [9.167,-1.956,4.534,7.498] │ +2. │ [-1.956,45.634,7.206,2.369] │ +3. │ [4.534,7.206,38.011,5.283] │ +4. │ [7.498,2.369,5.283,11.034] │ + └─────────────────────────────┘ +``` + + diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md new file mode 100644 index 00000000000..3e6867b96d6 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md @@ -0,0 +1,73 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/covarsampstable +sidebar_position: 37 +--- + +# covarSampStable + +Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. Similar to [covarSamp](../reference/covarsamp.md) but works slower while providing a lower computational error. + +**Syntax** + +```sql +covarSampStable(x, y) +``` + +**Arguments** + +- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). +- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md). + +**Returned Value** + +- The sample covariance between `x` and `y`. For `n <= 1`, `inf` is returned. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS series; +CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory; +INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3); +``` + +```sql +SELECT covarSampStable(x_value, y_value) +FROM +( + SELECT + x_value, + y_value + FROM series +); +``` + +Result: + +```reference +┌─covarSampStable(x_value, y_value)─┐ +│ 7.206275555555556 │ +└───────────────────────────────────┘ +``` + +Query: + +```sql +SELECT covarSampStable(x_value, y_value) +FROM +( + SELECT + x_value, + y_value + FROM series LIMIT 1 +); +``` + +Result: + +```reference +┌─covarSampStable(x_value, y_value)─┐ +│ inf │ +└───────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index cfcca1bb436..a56b1c97681 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -9,106 +9,116 @@ toc_hidden: true Standard aggregate functions: -- [count](/docs/en/sql-reference/aggregate-functions/reference/count.md) -- [min](/docs/en/sql-reference/aggregate-functions/reference/min.md) -- [max](/docs/en/sql-reference/aggregate-functions/reference/max.md) -- [sum](/docs/en/sql-reference/aggregate-functions/reference/sum.md) -- [avg](/docs/en/sql-reference/aggregate-functions/reference/avg.md) -- [any](/docs/en/sql-reference/aggregate-functions/reference/any.md) -- [stddevPop](/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md) -- [stddevPopStable](/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md) -- [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md) -- [stddevSampStable](/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md) -- [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md) -- [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md) -- [corr](./corr.md) -- [covarPop](/docs/en/sql-reference/aggregate-functions/reference/covarpop.md) -- [covarSamp](/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md) -- [entropy](./entropy.md) -- [exponentialMovingAverage](./exponentialmovingaverage.md) -- [intervalLengthSum](./intervalLengthSum.md) -- [kolmogorovSmirnovTest](./kolmogorovsmirnovtest.md) -- [mannwhitneyutest](./mannwhitneyutest.md) -- [median](./median.md) -- [rankCorr](./rankCorr.md) -- [sumKahan](./sumkahan.md) -- [studentTTest](./studentttest.md) -- [welchTTest](./welchttest.md) +- [count](../reference/count.md) +- [min](../reference/min.md) +- [max](../reference/max.md) +- [sum](../reference/sum.md) +- [avg](../reference/avg.md) +- [any](../reference/any.md) +- [stddevPop](../reference/stddevpop.md) +- [stddevPopStable](../reference/stddevpopstable.md) +- [stddevSamp](../reference/stddevsamp.md) +- [stddevSampStable](../reference/stddevsampstable.md) +- [varPop](../reference/varpop.md) +- [varSamp](../reference/varsamp.md) +- [corr](../reference/corr.md) +- [corr](../reference/corrstable.md) +- [corrMatrix](../reference/corrmatrix.md) +- [covarPop](../reference/covarpop.md) +- [covarStable](../reference/covarpopstable.md) +- [covarPopMatrix](../reference/covarpopmatrix.md) +- [covarSamp](../reference/covarsamp.md) +- [covarSampStable](../reference/covarsampstable.md) +- [covarSampMatrix](../reference/covarsampmatrix.md) +- [entropy](../reference/entropy.md) +- [exponentialMovingAverage](../reference/exponentialmovingaverage.md) +- [intervalLengthSum](../reference/intervalLengthSum.md) +- [kolmogorovSmirnovTest](../reference/kolmogorovsmirnovtest.md) +- [mannwhitneyutest](../reference/mannwhitneyutest.md) +- [median](../reference/median.md) +- [rankCorr](../reference/rankCorr.md) +- [sumKahan](../reference/sumkahan.md) +- [studentTTest](../reference/studentttest.md) +- [welchTTest](../reference/welchttest.md) ClickHouse-specific aggregate functions: -- [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md) -- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md) -- [boundingRatio](/docs/en/sql-reference/aggregate-functions/reference/boundrat.md) -- [first_value](/docs/en/sql-reference/aggregate-functions/reference/first_value.md) -- [last_value](/docs/en/sql-reference/aggregate-functions/reference/last_value.md) -- [argMin](/docs/en/sql-reference/aggregate-functions/reference/argmin.md) -- [argMax](/docs/en/sql-reference/aggregate-functions/reference/argmax.md) -- [avgWeighted](/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md) -- [topK](/docs/en/sql-reference/aggregate-functions/reference/topk.md) -- [topKWeighted](/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md) -- [deltaSum](./deltasum.md) -- [deltaSumTimestamp](./deltasumtimestamp.md) -- [groupArray](/docs/en/sql-reference/aggregate-functions/reference/grouparray.md) -- [groupArrayLast](/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md) -- [groupUniqArray](/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md) -- [groupArrayInsertAt](/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md) -- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md) -- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) -- [groupArraySample](./grouparraysample.md) -- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md) -- [groupArrayIntersect](./grouparrayintersect.md) -- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) -- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) -- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) -- [groupBitmap](/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md) -- [groupBitmapAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md) -- [groupBitmapOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md) -- [groupBitmapXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md) -- [sumWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md) -- [sumMap](/docs/en/sql-reference/aggregate-functions/reference/summap.md) -- [sumMapWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md) -- [sumMapFiltered](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfiltered) -- [sumMapFilteredWithOverflow](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfilteredwithoverflow) -- [minMap](/docs/en/sql-reference/aggregate-functions/reference/minmap.md) -- [maxMap](/docs/en/sql-reference/aggregate-functions/reference/maxmap.md) -- [skewSamp](/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md) -- [skewPop](/docs/en/sql-reference/aggregate-functions/reference/skewpop.md) -- [kurtSamp](/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md) -- [kurtPop](/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md) -- [uniq](/docs/en/sql-reference/aggregate-functions/reference/uniq.md) -- [uniqExact](/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md) -- [uniqCombined](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md) -- [uniqCombined64](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md) -- [uniqHLL12](/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md) -- [uniqTheta](/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md) -- [quantile](/docs/en/sql-reference/aggregate-functions/reference/quantile.md) -- [quantiles](/docs/en/sql-reference/aggregate-functions/reference/quantiles.md) -- [quantileExact](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md) -- [quantileExactLow](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow) -- [quantileExactHigh](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh) -- [quantileExactWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md) -- [quantileTiming](/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md) -- [quantileTimingWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md) -- [quantileDeterministic](/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md) -- [quantileTDigest](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md) -- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) -- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) -- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) -- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch) -- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md) -- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md) -- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) -- [categoricalInformationValue](/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) -- [contingency](./contingency.md) -- [cramersV](./cramersv.md) -- [cramersVBiasCorrected](./cramersvbiascorrected.md) -- [theilsU](./theilsu.md) -- [maxIntersections](./maxintersections.md) -- [maxIntersectionsPosition](./maxintersectionsposition.md) -- [meanZTest](./meanztest.md) -- [quantileGK](./quantileGK.md) -- [quantileInterpolatedWeighted](./quantileinterpolatedweighted.md) -- [sparkBar](./sparkbar.md) -- [sumCount](./sumcount.md) -- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md) +- [analysisOfVariance](../reference/analysis_of_variance.md) +- [any](../reference/any_respect_nulls.md) +- [anyHeavy](../reference/anyheavy.md) +- [anyLast](../reference/anylast.md) +- [anyLast](../reference/anylast_respect_nulls.md) +- [boundingRatio](../reference/boundrat.md) +- [first_value](../reference/first_value.md) +- [last_value](../reference/last_value.md) +- [argMin](../reference/argmin.md) +- [argMax](../reference/argmax.md) +- [avgWeighted](../reference/avgweighted.md) +- [topK](../reference/topk.md) +- [topKWeighted](../reference/topkweighted.md) +- [deltaSum](../reference/deltasum.md) +- [deltaSumTimestamp](../reference/deltasumtimestamp.md) +- [groupArray](../reference/grouparray.md) +- [groupArrayLast](../reference/grouparraylast.md) +- [groupUniqArray](../reference/groupuniqarray.md) +- [groupArrayInsertAt](../reference/grouparrayinsertat.md) +- [groupArrayMovingAvg](../reference/grouparraymovingavg.md) +- [groupArrayMovingSum](../reference/grouparraymovingsum.md) +- [groupArraySample](../reference/grouparraysample.md) +- [groupArraySorted](../reference/grouparraysorted.md) +- [groupArrayIntersect](../reference/grouparrayintersect.md) +- [groupBitAnd](../reference/groupbitand.md) +- [groupBitOr](../reference/groupbitor.md) +- [groupBitXor](../reference/groupbitxor.md) +- [groupBitmap](../reference/groupbitmap.md) +- [groupBitmapAnd](../reference/groupbitmapand.md) +- [groupBitmapOr](../reference/groupbitmapor.md) +- [groupBitmapXor](../reference/groupbitmapxor.md) +- [sumWithOverflow](../reference/sumwithoverflow.md) +- [sumMap](../reference/summap.md) +- [sumMapWithOverflow](../reference/summapwithoverflow.md) +- [sumMapFiltered](../parametric-functions.md/#summapfiltered) +- [sumMapFilteredWithOverflow](../parametric-functions.md/#summapfilteredwithoverflow) +- [minMap](../reference/minmap.md) +- [maxMap](../reference/maxmap.md) +- [skewSamp](../reference/skewsamp.md) +- [skewPop](../reference/skewpop.md) +- [kurtSamp](../reference/kurtsamp.md) +- [kurtPop](../reference/kurtpop.md) +- [uniq](../reference/uniq.md) +- [uniqExact](../reference/uniqexact.md) +- [uniqCombined](../reference/uniqcombined.md) +- [uniqCombined64](../reference/uniqcombined64.md) +- [uniqHLL12](../reference/uniqhll12.md) +- [uniqTheta](../reference/uniqthetasketch.md) +- [quantile](../reference/quantile.md) +- [quantiles](../reference/quantiles.md) +- [quantileExact](../reference/quantileexact.md) +- [quantileExactLow](../reference/quantileexact.md#quantileexactlow) +- [quantileExactHigh](../reference/quantileexact.md#quantileexacthigh) +- [quantileExactWeighted](../reference/quantileexactweighted.md) +- [quantileTiming](../reference/quantiletiming.md) +- [quantileTimingWeighted](../reference/quantiletimingweighted.md) +- [quantileDeterministic](../reference/quantiledeterministic.md) +- [quantileTDigest](../reference/quantiletdigest.md) +- [quantileTDigestWeighted](../reference/quantiletdigestweighted.md) +- [quantileBFloat16](../reference/quantilebfloat16.md#quantilebfloat16) +- [quantileBFloat16Weighted](../reference/quantilebfloat16.md#quantilebfloat16weighted) +- [quantileDD](../reference/quantileddsketch.md#quantileddsketch) +- [simpleLinearRegression](../reference/simplelinearregression.md) +- [singleValueOrNull](../reference/singlevalueornull.md) +- [stochasticLinearRegression](../reference/stochasticlinearregression.md) +- [stochasticLogisticRegression](../reference/stochasticlogisticregression.md) +- [categoricalInformationValue](../reference/categoricalinformationvalue.md) +- [contingency](../reference/contingency.md) +- [cramersV](../reference/cramersv.md) +- [cramersVBiasCorrected](../reference/cramersvbiascorrected.md) +- [theilsU](../reference/theilsu.md) +- [maxIntersections](../reference/maxintersections.md) +- [maxIntersectionsPosition](../reference/maxintersectionsposition.md) +- [meanZTest](../reference/meanztest.md) +- [quantileGK](../reference/quantileGK.md) +- [quantileInterpolatedWeighted](../reference/quantileinterpolatedweighted.md) +- [sparkBar](../reference/sparkbar.md) +- [sumCount](../reference/sumcount.md) +- [largestTriangleThreeBuckets](../reference/largestTriangleThreeBuckets.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index e2a5bc53e32..856d447ac13 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 ## quantiles -Syntax: `quantiles(level1, level2, …)(x)` +Syntax: `quantiles(level1, level2, ...)(x)` All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. diff --git a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md new file mode 100644 index 00000000000..e39af77059a --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md @@ -0,0 +1,57 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/singlevalueornull +sidebar_position: 220 +--- + +# singleValueOrNull + +The aggregate function `singleValueOrNull` is used to implement subquery operators, such as `x = ALL (SELECT ...)`. It checks if there is only one unique non-NULL value in the data. +If there is only one unique value, it returns it. If there are zero or at least two distinct values, it returns NULL. + +**Syntax** + +``` sql +singleValueOrNull(x) +``` + +**Parameters** + +- `x` — Column of any [data type](../../data-types/index.md). + +**Returned values** + +- The unique value, if there is only one unique non-NULL value in `x`. +- `NULL`, if there are zero or at least two distinct values. + +**Examples** + +Query: + +``` sql +CREATE TABLE test (x UInt8 NULL) ENGINE=Log; +INSERT INTO test (x) VALUES (NULL), (NULL), (5), (NULL), (NULL); +SELECT singleValueOrNull(x) FROM test; +``` + +Result: + +```response +┌─singleValueOrNull(x)─┐ +│ 5 │ +└──────────────────────┘ +``` + +Query: + +```sql +INSERT INTO test (x) VALUES (10); +SELECT singleValueOrNull(x) FROM test; +``` + +Result: + +```response +┌─singleValueOrNull(x)─┐ +│ ᴺᵁᴸᴸ │ +└──────────────────────┘ +``` diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index 87511a505dc..37f0d0e50ae 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -6,9 +6,9 @@ sidebar_label: AggregateFunction # AggregateFunction -Aggregate functions can have an implementation-defined intermediate state that can be serialized to an `AggregateFunction(…)` data type and stored in a table, usually, by means of [a materialized view](../../sql-reference/statements/create/view.md). The common way to produce an aggregate function state is by calling the aggregate function with the `-State` suffix. To get the final result of aggregation in the future, you must use the same aggregate function with the `-Merge`suffix. +Aggregate functions can have an implementation-defined intermediate state that can be serialized to an `AggregateFunction(...)` data type and stored in a table, usually, by means of [a materialized view](../../sql-reference/statements/create/view.md). The common way to produce an aggregate function state is by calling the aggregate function with the `-State` suffix. To get the final result of aggregation in the future, you must use the same aggregate function with the `-Merge`suffix. -`AggregateFunction(name, types_of_arguments…)` — parametric data type. +`AggregateFunction(name, types_of_arguments...)` — parametric data type. **Parameters** diff --git a/docs/en/sql-reference/data-types/boolean.md b/docs/en/sql-reference/data-types/boolean.md index 4c59bd947de..6fcbc218c5d 100644 --- a/docs/en/sql-reference/data-types/boolean.md +++ b/docs/en/sql-reference/data-types/boolean.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/data-types/boolean sidebar_position: 22 -sidebar_label: Boolean +sidebar_label: Bool --- # Bool diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md new file mode 100644 index 00000000000..955fd54e641 --- /dev/null +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -0,0 +1,495 @@ +--- +slug: /en/sql-reference/data-types/dynamic +sidebar_position: 56 +sidebar_label: Dynamic +--- + +# Dynamic + +This type allows to store values of any type inside it without knowing all of them in advance. + +To declare a column of `Dynamic` type, use the following syntax: + +``` sql + Dynamic(max_types=N) +``` + +Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`. + +:::note +The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`. +::: + +## Creating Dynamic + +Using `Dynamic` type in table column definition: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d) FROM test; +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ +│ Hello, World! │ String │ +│ [1,2,3] │ Array(Int64) │ +└───────────────┴────────────────┘ +``` + +Using CAST from ordinary column: + +```sql +SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ Hello, World! │ String │ +└───────────────┴────────────────┘ +``` + +Using CAST from `Variant` column: + +```sql +SET allow_experimental_variant_type = 1, use_variant_as_common_type = 1; +SELECT multiIf((number % 3) = 0, number, (number % 3) = 1, range(number + 1), NULL)::Dynamic AS d, dynamicType(d) FROM numbers(3) +``` + +```text +┌─d─────┬─dynamicType(d)─┐ +│ 0 │ UInt64 │ +│ [0,1] │ Array(UInt64) │ +│ ᴺᵁᴸᴸ │ None │ +└───────┴────────────────┘ +``` + + +## Reading Dynamic nested types as subcolumns + +`Dynamic` type supports reading a single nested type from a `Dynamic` column using the type name as a subcolumn. +So, if you have column `d Dynamic` you can read a subcolumn of any valid type `T` using syntax `d.T`, +this subcolumn will have type `Nullable(T)` if `T` can be inside `Nullable` and `T` otherwise. This subcolumn will +be the same size as original `Dynamic` column and will contain `NULL` values (or empty values if `T` cannot be inside `Nullable`) +in all rows in which original `Dynamic` column doesn't have type `T`. + +`Dynamic` subcolumns can be also read using function `dynamicElement(dynamic_column, type_name)`. + +Examples: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d), d.String, d.Int64, d.`Array(Int64)`, d.Date, d.`Array(String)` FROM test; +``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─d.String──────┬─d.Int64─┬─d.Array(Int64)─┬─d.Date─┬─d.Array(String)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ [] │ ᴺᵁᴸᴸ │ [] │ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ ᴺᵁᴸᴸ │ [] │ +└───────────────┴────────────────┴───────────────┴─────────┴────────────────┴────────┴─────────────────┘ +``` + +```sql +SELECT toTypeName(d.String), toTypeName(d.Int64), toTypeName(d.`Array(Int64)`), toTypeName(d.Date), toTypeName(d.`Array(String)`) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(d.String)─┬─toTypeName(d.Int64)─┬─toTypeName(d.Array(Int64))─┬─toTypeName(d.Date)─┬─toTypeName(d.Array(String))─┐ +│ Nullable(String) │ Nullable(Int64) │ Array(Int64) │ Nullable(Date) │ Array(String) │ +└──────────────────────┴─────────────────────┴────────────────────────────┴────────────────────┴─────────────────────────────┘ +``` + +```sql +SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;``` +``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ [] │ ᴺᵁᴸᴸ │ [] │ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ ᴺᵁᴸᴸ │ [] │ +└───────────────┴────────────────┴─────────────────────────────┴────────────────────────────┴───────────────────────────────────┴───────────────────────────┴────────────────────────────────────┘ +``` + +To know what variant is stored in each row function `dynamicType(dynamic_column)` can be used. It returns `String` with value type name for each row (or `'None'` if row is `NULL`). + +Example: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT dynamicType(d) from test; +``` + +```text +┌─dynamicType(d)─┐ +│ None │ +│ Int64 │ +│ String │ +│ Array(Int64) │ +└────────────────┘ +``` + +## Conversion between Dynamic column and other columns + +There are 4 possible conversions that can be performed with `Dynamic` column. + +### Converting an ordinary column to a Dynamic column + +```sql +SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ Hello, World! │ String │ +└───────────────┴────────────────┘ +``` + +### Converting a String column to a Dynamic column through parsing + +To parse `Dynamic` type values from a `String` column you can enable setting `cast_string_to_dynamic_use_inference`: + +```sql +SET cast_string_to_dynamic_use_inference = 1; +SELECT CAST(materialize(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01')), 'Map(String, Dynamic)') as map_of_dynamic, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamic) as map_of_dynamic_types; +``` + +```text +┌─map_of_dynamic──────────────────────────────┬─map_of_dynamic_types─────────────────────────┐ +│ {'key1':42,'key2':true,'key3':'2020-01-01'} │ {'key1':'Int64','key2':'Bool','key3':'Date'} │ +└─────────────────────────────────────────────┴──────────────────────────────────────────────┘ +``` + +### Converting a Dynamic column to an ordinary column + +It is possible to convert a `Dynamic` column to an ordinary column. In this case all nested types will be converted to a destination type: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('42.42'), (true), ('e10'); +SELECT d::Nullable(Float64) FROM test; +``` + +```text +┌─CAST(d, 'Nullable(Float64)')─┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ 42.42 │ +│ 1 │ +│ 0 │ +└──────────────────────────────┘ +``` + +### Converting a Variant column to Dynamic column + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('String'), ([1, 2, 3]); +SELECT v::Dynamic as d, dynamicType(d) from test; +``` + +```text +┌─d───────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ UInt64 │ +│ String │ String │ +│ [1,2,3] │ Array(UInt64) │ +└─────────┴────────────────┘ +``` + +### Converting a Dynamic(max_types=N) column to another Dynamic(max_types=K) + +If `K >= N` than during conversion the data doesn't change: + +```sql +CREATE TABLE test (d Dynamic(max_types=3)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true); +SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d─────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ +│ 43 │ Int64 │ +│ 42.42 │ String │ +│ true │ Bool │ +└───────┴────────────────┘ +``` + +If `K < N`, then the values with the rarest types are converted to `String`: +```text +CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]); +SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ 42 │ Int64 │ +│ 43 │ Int64 │ 43 │ Int64 │ +│ 42.42 │ String │ 42.42 │ String │ +│ true │ Bool │ true │ String │ +│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │ +└─────────┴────────────────┴─────────┴─────────────────┘ +``` + +If `K=1`, all types are converted to `String`: + +```text +CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]); +SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ 42 │ String │ +│ 43 │ Int64 │ 43 │ String │ +│ 42.42 │ String │ 42.42 │ String │ +│ true │ Bool │ true │ String │ +│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │ +└─────────┴────────────────┴─────────┴─────────────────┘ +``` + +## Reading Dynamic type from the data + +All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Dynamic` type. During data parsing ClickHouse tries to infer the type of each value and use it during insertion to `Dynamic` column. + +Example: + +```sql +SELECT + d, + dynamicType(d), + dynamicElement(d, 'String') AS str, + dynamicElement(d, 'Int64') AS num, + dynamicElement(d, 'Float64') AS float, + dynamicElement(d, 'Date') AS date, + dynamicElement(d, 'Array(Int64)') AS arr +FROM format(JSONEachRow, 'd Dynamic', $$ +{"d" : "Hello, World!"}, +{"d" : 42}, +{"d" : 42.42}, +{"d" : "2020-01-01"}, +{"d" : [1, 2, 3]} +$$) +``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─str───────────┬──num─┬─float─┬───────date─┬─arr─────┐ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42.42 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │ +│ 2020-01-01 │ Date │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴────────────────┴───────────────┴──────┴───────┴────────────┴─────────┘ +``` + +## Comparing values of Dynamic type + +Values of `Dynamic` types are compared similar to values of `Variant` type: +The result of operator `<` for values `d1` with underlying type `T1` and `d2` with underlying type `T2` of a type `Dynamic` is defined as follows: +- If `T1 = T2 = T`, the result will be `d1.T < d2.T` (underlying values will be compared). +- If `T1 != T2`, the result will be `T1 < T2` (type names will be compared). + +Examples: +```sql +CREATE TABLE test (d1 Dynamic, d2 Dynamic) ENGINE=Memory; +INSERT INTO test VALUES (42, 42), (42, 43), (42, 'abc'), (42, [1, 2, 3]), (42, []), (42, NULL); +``` + +```sql +SELECT d2, dynamicType(d2) as d2_type from test order by d2; +``` + +```text +┌─d2──────┬─d2_type──────┐ +│ [] │ Array(Int64) │ +│ [1,2,3] │ Array(Int64) │ +│ 42 │ Int64 │ +│ 43 │ Int64 │ +│ abc │ String │ +│ ᴺᵁᴸᴸ │ None │ +└─────────┴──────────────┘ +``` + +```sql +SELECT d1, dynamicType(d1) as d1_type, d2, dynamicType(d2) as d2_type, d1 = d2, d1 < d2, d1 > d2 from test; +``` + +```text +┌─d1─┬─d1_type─┬─d2──────┬─d2_type──────┬─equals(d1, d2)─┬─less(d1, d2)─┬─greater(d1, d2)─┐ +│ 42 │ Int64 │ 42 │ Int64 │ 1 │ 0 │ 0 │ +│ 42 │ Int64 │ 43 │ Int64 │ 0 │ 1 │ 0 │ +│ 42 │ Int64 │ abc │ String │ 0 │ 1 │ 0 │ +│ 42 │ Int64 │ [1,2,3] │ Array(Int64) │ 0 │ 0 │ 1 │ +│ 42 │ Int64 │ [] │ Array(Int64) │ 0 │ 0 │ 1 │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ None │ 0 │ 1 │ 0 │ +└────┴─────────┴─────────┴──────────────┴────────────────┴──────────────┴─────────────────┘ +``` + +If you need to find the row with specific `Dynamic` value, you can do one of the following: + +- Cast value to the `Dynamic` type: + +```sql +SELECT * FROM test WHERE d2 == [1,2,3]::Array(UInt32)::Dynamic; +``` + +```text +┌─d1─┬─d2──────┐ +│ 42 │ [1,2,3] │ +└────┴─────────┘ +``` + +- Compare `Dynamic` subcolumn with required type: + +```sql +SELECT * FROM test WHERE d2.`Array(Int65)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)') +``` + +```text +┌─d1─┬─d2──────┐ +│ 42 │ [1,2,3] │ +└────┴─────────┘ +``` + +Sometimes it can be useful to make additional check on dynamic type as subcolumns with complex types like `Array/Map/Tuple` cannot be inside `Nullable` and will have default values instead of `NULL` on rows with different types: + +```sql +SELECT d2, d2.`Array(Int64)`, dynamicType(d2) FROM test WHERE d2.`Array(Int64)` == []; +``` + +```text +┌─d2───┬─d2.Array(UInt32)─┬─dynamicType(d2)─┐ +│ 42 │ [] │ Int64 │ +│ 43 │ [] │ Int64 │ +│ abc │ [] │ String │ +│ [] │ [] │ Array(Int32) │ +│ ᴺᵁᴸᴸ │ [] │ None │ +└──────┴──────────────────┴─────────────────┘ +``` + +```sql +SELECT d2, d2.`Array(Int64)`, dynamicType(d2) FROM test WHERE dynamicType(d2) == 'Array(Int64)' AND d2.`Array(Int64)` == []; +``` + +```text +┌─d2─┬─d2.Array(UInt32)─┬─dynamicType(d2)─┐ +│ [] │ [] │ Array(Int64) │ +└────┴──────────────────┴─────────────────┘ +``` + +**Note:** values of dynamic types with different numeric types are considered as different values and not compared between each other, their type names are compared instead. + +Example: + +```sql +CREATE TABLE test (d Dynamic) ENGINE=Memory; +INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64); +SELECT d, dynamicType(d) FROM test ORDER by d; +``` + +```text +┌─v───┬─dynamicType(v)─┐ +│ 1 │ Int64 │ +│ 100 │ Int64 │ +│ 1 │ UInt32 │ +│ 100 │ UInt32 │ +└─────┴────────────────┘ +``` + +## Reaching the limit in number of different data types stored inside Dynamic + +`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic). +When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values. + +Let's see what happens when the limit is reached in different scenarios. + +### Reaching the limit during data parsing + +During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values: + +```sql +SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', ' +{"d" : 42} +{"d" : [1, 2, 3]} +{"d" : "Hello, World!"} +{"d" : "2020-01-01"} +{"d" : ["str1", "str2", "str3"]} +{"d" : {"a" : 1, "b" : [1, 2, 3]}} +') +``` + +```text +┌─d──────────────────────────┬─dynamicType(d)─┐ +│ 42 │ Int64 │ +│ [1,2,3] │ Array(Int64) │ +│ Hello, World! │ String │ +│ 2020-01-01 │ String │ +│ ["str1", "str2", "str3"] │ String │ +│ {"a" : 1, "b" : [1, 2, 3]} │ String │ +└────────────────────────────┴────────────────┘ +``` + +As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`. + +### During merges of data parts in MergeTree table engines + +During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts. +In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`. In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation. + +Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types: + +```sql +CREATE TABLE test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree ORDER BY id; +SYSTEM STOP MERGES test; +INSERT INTO test SELECT number, number FROM numbers(5); +INSERT INTO test SELECT number, range(number) FROM numbers(4); +INSERT INTO test SELECT number, toDate(number) FROM numbers(3); +INSERT INTO test SELECT number, map(number, number) FROM numbers(2); +INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1); +``` + +Each insert will create a separate data pert with `Dynamic` column containing single type: +```sql +SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +``` + +```text +┌─count()─┬─dynamicType(d)──────┬─_part─────┐ +│ 5 │ UInt64 │ all_1_1_0 │ +│ 4 │ Array(UInt64) │ all_2_2_0 │ +│ 3 │ Date │ all_3_3_0 │ +│ 2 │ Map(UInt64, UInt64) │ all_4_4_0 │ +│ 1 │ String │ all_5_5_0 │ +└─────────┴─────────────────────┴───────────┘ +``` + +Now, let's merge all parts into one and see what will happen: + +```sql +SYSTEM START MERGES test; +OPTIMIZE TABLE test FINAL; +SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +``` + +```text +┌─count()─┬─dynamicType(d)─┬─_part─────┐ +│ 5 │ UInt64 │ all_1_5_2 │ +│ 6 │ String │ all_1_5_2 │ +│ 4 │ Array(UInt64) │ all_1_5_2 │ +└─────────┴────────────────┴───────────┘ +``` + +As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index 0316df7fe34..0c021b28f74 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -21,8 +21,8 @@ The `FixedString` type is efficient when data has the length of precisely `N` by Examples of the values that can be efficiently stored in `FixedString`-typed columns: - The binary representation of IP addresses (`FixedString(16)` for IPv6). -- Language codes (ru_RU, en_US … ). -- Currency codes (USD, RUB … ). +- Language codes (ru_RU, en_US ... ). +- Currency codes (USD, RUB ... ). - Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). To store UUID values, use the [UUID](../../sql-reference/data-types/uuid.md) data type. diff --git a/docs/en/sql-reference/data-types/ipv4.md b/docs/en/sql-reference/data-types/ipv4.md index 637ed543e08..98ba9f4abac 100644 --- a/docs/en/sql-reference/data-types/ipv4.md +++ b/docs/en/sql-reference/data-types/ipv4.md @@ -57,6 +57,18 @@ SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; └──────────────────┴───────────┘ ``` +IPv4 addresses can be directly compared to IPv6 addresses: + +```sql +SELECT toIPv4('127.0.0.1') = toIPv6('::ffff:127.0.0.1'); +``` + +```text +┌─equals(toIPv4('127.0.0.1'), toIPv6('::ffff:127.0.0.1'))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────┘ +``` + **See Also** - [Functions for Working with IPv4 and IPv6 Addresses](../functions/ip-address-functions.md) diff --git a/docs/en/sql-reference/data-types/ipv6.md b/docs/en/sql-reference/data-types/ipv6.md index 642a7db81fc..d3b7cc72a1a 100644 --- a/docs/en/sql-reference/data-types/ipv6.md +++ b/docs/en/sql-reference/data-types/ipv6.md @@ -57,6 +57,19 @@ SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; └──────────────────┴──────────────────────────────────┘ ``` +IPv6 addresses can be directly compared to IPv4 addresses: + +```sql +SELECT toIPv4('127.0.0.1') = toIPv6('::ffff:127.0.0.1'); +``` + +```text +┌─equals(toIPv4('127.0.0.1'), toIPv6('::ffff:127.0.0.1'))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────┘ +``` + + **See Also** - [Functions for Working with IPv4 and IPv6 Addresses](../functions/ip-address-functions.md) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 2c734969afc..9f82c2f093a 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -6,100 +6,106 @@ sidebar_label: Map(K, V) # Map(K, V) -`Map(K, V)` data type stores `key:value` pairs. +Data type `Map(K, V)` stores key-value pairs. + +Unlike other databases, maps are not unique in ClickHouse, i.e. a map can contain two elements with the same key. +(The reason for that is that maps are internally implemented as `Array(Tuple(K, V))`.) + +You can use use syntax `m[k]` to obtain the value for key `k` in map `m`. +Also, `m[k]` scans the map, i.e. the runtime of the operation is linear in the size of the map. **Parameters** -- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). - -To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. +- `K` — The type of the Map keys. Arbitrary type except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md) types. +- `V` — The type of the Map values. Arbitrary type. **Examples** -Consider the table: +Create a table with a column of type map: ``` sql -CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory; -INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); +CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory; +INSERT INTO tab VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); ``` -Select all `key2` values: +To select `key2` values: ```sql -SELECT a['key2'] FROM table_map; +SELECT m['key2'] FROM tab; ``` + Result: ```text -┌─arrayElement(a, 'key2')─┐ +┌─arrayElement(m, 'key2')─┐ │ 10 │ │ 20 │ │ 30 │ └─────────────────────────┘ ``` -If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays. +If the requested key `k` is not contained in the map, `m[k]` returns the value type's default value, e.g. `0` for integer types and `''` for string types. +To check whether a key exists in a map, you can use function [mapContains](../../sql-reference/functions/tuple-map-functions#mapcontains). ```sql -INSERT INTO table_map VALUES ({'key3':100}), ({}); -SELECT a['key3'] FROM table_map; +CREATE TABLE tab (m Map(String, UInt64)) ENGINE=Memory; +INSERT INTO tab VALUES ({'key1':100}), ({}); +SELECT m['key1'] FROM tab; ``` Result: ```text -┌─arrayElement(a, 'key3')─┐ +┌─arrayElement(m, 'key1')─┐ │ 100 │ │ 0 │ └─────────────────────────┘ -┌─arrayElement(a, 'key3')─┐ -│ 0 │ -│ 0 │ -│ 0 │ -└─────────────────────────┘ ``` -## Convert Tuple to Map Type +## Converting Tuple to Map -You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function: +Values of type `Tuple()` can be casted to values of type `Map()` using function [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast): + +**Example** + +Query: ``` sql SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map; ``` +Result: + ``` text ┌─map───────────────────────────┐ │ {1:'Ready',2:'Steady',3:'Go'} │ └───────────────────────────────┘ ``` -## Map.keys and Map.values Subcolumns +## Reading subcolumns of Map -To optimize `Map` column processing, in some cases you can use the `keys` and `values` subcolumns instead of reading the whole column. +To avoid reading the entire map, you can use subcolumns `keys` and `values` in some cases. **Example** Query: ``` sql -CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; +CREATE TABLE tab (m Map(String, UInt64)) ENGINE = Memory; +INSERT INTO tab VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); -INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); - -SELECT a.keys FROM t_map; - -SELECT a.values FROM t_map; +SELECT m.keys FROM tab; -- same as mapKeys(m) +SELECT m.values FROM tab; -- same as mapValues(m) ``` Result: ``` text -┌─a.keys─────────────────┐ +┌─m.keys─────────────────┐ │ ['key1','key2','key3'] │ └────────────────────────┘ -┌─a.values─┐ +┌─m.values─┐ │ [1,2,3] │ └──────────┘ ``` diff --git a/docs/en/sql-reference/data-types/nested-data-structures/index.md b/docs/en/sql-reference/data-types/nested-data-structures/index.md index d118170cd39..579ee9bfa8b 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/index.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/index.md @@ -6,7 +6,7 @@ sidebar_label: Nested(Name1 Type1, Name2 Type2, ...) # Nested -## Nested(name1 Type1, Name2 Type2, …) +## Nested(name1 Type1, Name2 Type2, ...) A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure. diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 39f8409c1e1..4fb74ac30e4 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -5,7 +5,7 @@ sidebar_label: SimpleAggregateFunction --- # SimpleAggregateFunction -`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we do not have to store and process any extra data. +`SimpleAggregateFunction(name, types_of_arguments...)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we do not have to store and process any extra data. The common way to produce an aggregate function value is by calling the aggregate function with the [-SimpleState](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-simplestate) suffix. diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 6d95f3dc358..e3fb1d91c05 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -77,7 +77,7 @@ Alias: `a * b` (operator) ## divide -Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function. +Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../data-types/float.md). Integer division is provided by the `intDiv` function. Division by 0 returns `inf`, `-inf`, or `nan`. @@ -140,11 +140,75 @@ Same as `intDiv` but returns zero when dividing by zero or when dividing a minim intDivOrZero(a, b) ``` +## isFinite + +Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0. + +**Syntax** + +```sql +isFinite(x) +``` + +## isInfinite + +Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN. + +**Syntax** + +```sql +isInfinite(x) +``` + +## ifNotFinite + +Checks whether a floating point value is finite. + +**Syntax** + +```sql +ifNotFinite(x,y) +``` + +**Arguments** + +- `x` — Value to check for infinity. [Float\*](../data-types/float.md). +- `y` — Fallback value. [Float\*](../data-types/float.md). + +**Returned value** + +- `x` if `x` is finite. +- `y` if `x` is not finite. + +**Example** + +Query: + + SELECT 1/0 as infimum, ifNotFinite(infimum,42) + +Result: + + ┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐ + │ inf │ 42 │ + └─────────┴───────────────────────────────┘ + +You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. + +## isNaN + +Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. + +**Syntax** + +```sql +isNaN(x) +``` + ## modulo Calculates the remainder of the division of two values `a` by `b`. -The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md). +The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../data-types/float.md). The remainder is computed like in C++. Truncated division is used for negative numbers. @@ -248,7 +312,7 @@ lcm(a, b) ## max2 -Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). +Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../data-types/float.md). **Syntax** @@ -274,7 +338,7 @@ Result: ## min2 -Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). +Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../data-types/float.md). **Syntax** @@ -300,7 +364,7 @@ Result: ## multiplyDecimal -Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). +Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../data-types/decimal.md). The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. @@ -314,15 +378,13 @@ multiplyDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value. [Decimal](../data-types/decimal.md). +- `b` — Second value. [Decimal](../data-types/decimal.md). +- `result_scale` — Scale of result. [Int/UInt](../data-types/int-uint.md). **Returned value** -- The result of multiplication with given scale. - -Type: [Decimal256](../../sql-reference/data-types/decimal.md). +- The result of multiplication with given scale. [Decimal256](../data-types/decimal.md). **Example** @@ -376,7 +438,7 @@ Code: 407. DB::Exception: Received from localhost:9000. DB::Exception: Decimal m ## divideDecimal -Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). +Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../data-types/decimal.md). The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. @@ -390,15 +452,13 @@ divideDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value: [Decimal](../data-types/decimal.md). +- `b` — Second value: [Decimal](../data-types/decimal.md). +- `result_scale` — Scale of result: [Int/UInt](../data-types/int-uint.md). **Returned value** -- The result of division with given scale. - -Type: [Decimal256](../../sql-reference/data-types/decimal.md). +- The result of division with given scale. [Decimal256](../data-types/decimal.md). **Example** diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 87e733a4b0c..7b52fbff714 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -19,7 +19,7 @@ empty([x]) An array is considered empty if it does not contain any elements. :::note -Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. +Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. ::: The function also works for [strings](string-functions.md#empty) or [UUID](uuid-functions.md#empty). @@ -30,9 +30,7 @@ The function also works for [strings](string-functions.md#empty) or [UUID](uuid- **Returned value** -- Returns `1` for an empty array or `0` for a non-empty array. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty array or `0` for a non-empty array. [UInt8](../data-types/int-uint.md). **Example** @@ -63,7 +61,7 @@ notEmpty([x]) An array is considered non-empty if it contains at least one element. :::note -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. ::: The function also works for [strings](string-functions.md#notempty) or [UUID](uuid-functions.md#notempty). @@ -74,9 +72,7 @@ The function also works for [strings](string-functions.md#notempty) or [UUID](uu **Returned value** -- Returns `1` for a non-empty array or `0` for an empty array. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty array or `0` for an empty array. [UInt8](../data-types/int-uint.md). **Example** @@ -100,7 +96,7 @@ Returns the number of items in the array. The result type is UInt64. The function also works for strings. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. Alias: `OCTET_LENGTH` @@ -561,7 +557,7 @@ Result: └─────────────┴─────────────┴────────────────┴─────────────────┘ ``` -## array(x1, …), operator \[x1, …\] +## array(x1, ...), operator \[x1, ...\] Creates an array from the function arguments. The arguments must be constants and have types that have the smallest common type. At least one argument must be passed, because otherwise it isn’t clear which type of array to create. That is, you can’t use this function to create an empty array (to do that, use the ‘emptyArray\*’ function described above). @@ -581,7 +577,7 @@ arrayConcat(arrays) **Arguments** -- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. +- `arrays` – Arbitrary number of arguments of [Array](../data-types/array.md) type. **Example** @@ -768,9 +764,9 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) Elements set to `NULL` are handled as normal values. -## arrayCount(\[func,\] arr1, …) +## arrayCount(\[func,\] arr1, ...) -Returns the number of elements for which `func(arr1[i], …, arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array. +Returns the number of elements for which `func(arr1[i], ..., arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array. Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. @@ -797,9 +793,11 @@ The sizes of the two vectors must be equal. Arrays and Tuples may also contain m **Returned value** -- The dot product of the two vectors. +- The dot product of the two vectors. [Numeric](https://clickhouse.com/docs/en/native-protocol/columns#numeric-types). -Type: numeric - determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype. +:::note +The return type is determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype. +::: **Examples** @@ -847,7 +845,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) -Returns the array \[1, 2, 3, …, length (arr) \] +Returns the array \[1, 2, 3, ..., length (arr) \] This function is normally used with ARRAY JOIN. It allows counting something just once for each array after applying ARRAY JOIN. Example: @@ -887,7 +885,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) This function can also be used in higher-order functions. For example, you can use it to get array indexes for elements that match a condition. -## arrayEnumerateUniq(arr, …) +## arrayEnumerateUniq(arr, ...) Returns an array the same size as the source array, indicating for each element what its position is among elements with the same value. For example: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\]. @@ -1060,7 +1058,7 @@ arrayPushBack(array, single_value) **Arguments** - `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -1085,7 +1083,7 @@ arrayPushFront(array, single_value) **Arguments** - `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -1181,14 +1179,12 @@ arrayShingles(array, length) **Arguments** -- `array` — Input array [Array](../../sql-reference/data-types/array.md). +- `array` — Input array [Array](../data-types/array.md). - `length` — The length of each shingle. **Returned value** -- An array of generated shingles. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array of generated shingles. [Array](../data-types/array.md). **Examples** @@ -1206,7 +1202,7 @@ Result: └───────────────────┘ ``` -## arraySort(\[func,\] arr, …) {#sort} +## arraySort(\[func,\] arr, ...) {#sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. @@ -1307,11 +1303,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. ::: -## arrayPartialSort(\[func,\] limit, arr, …) +## arrayPartialSort(\[func,\] limit, arr, ...) Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. -## arrayReverseSort(\[func,\] arr, …) {#reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#reverse-sort} Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. @@ -1412,7 +1408,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayPartialReverseSort(\[func,\] limit, arr, …) +## arrayPartialReverseSort(\[func,\] limit, arr, ...) Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. @@ -1535,7 +1531,7 @@ Result: [3,9,1,4,5,6,7,8,2,10] ``` -## arrayUniq(arr, …) +## arrayUniq(arr, ...) If one argument is passed, it counts the number of different elements in the array. If multiple arguments are passed, it counts the number of different tuples of elements at corresponding positions in multiple arrays. @@ -1562,9 +1558,7 @@ arrayDifference(array) **Returned values** -Returns an array of differences between adjacent array elements. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +Returns an array of differences between adjacent array elements. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). **Example** @@ -1766,8 +1760,8 @@ arrayReduce(agg_func, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../data-types/string.md). +- `arr` — Any number of [array](../data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** @@ -1835,15 +1829,13 @@ arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. -- `arr` — Any number of [Array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../data-types/string.md). +- `ranges` — The ranges to aggretate which should be an [array](../data-types/array.md) of [tuples](../data-types/tuple.md) which containing the index and the length of each range. +- `arr` — Any number of [Array](../data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** -- Array containing results of the aggregate function over specified ranges. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing results of the aggregate function over specified ranges. [Array](../data-types/array.md). **Example** @@ -1956,7 +1948,7 @@ Alias: `flatten`. **Parameters** -- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. +- `array_of_arrays` — [Array](../data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. **Examples** @@ -1982,13 +1974,11 @@ arrayCompact(arr) **Arguments** -`arr` — The [array](../../sql-reference/data-types/array.md) to inspect. +`arr` — The [array](../data-types/array.md) to inspect. **Returned value** -The array without duplicate. - -Type: `Array`. +The array without duplicate. [Array](../data-types/array.md). **Example** @@ -2018,15 +2008,13 @@ arrayZip(arr1, arr2, ..., arrN) **Arguments** -- `arrN` — [Array](../../sql-reference/data-types/array.md). +- `arrN` — [Array](../data-types/array.md). The function can take any number of arrays of different types. All the input arrays must be of equal size. **Returned value** -- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../data-types/array.md). **Example** @@ -2079,9 +2067,9 @@ Result: └───────────────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) +## arrayMap(func, arr1, ...) -Returns an array obtained from the original arrays by application of `func(arr1[i], …, arrN[i])` for each element. Arrays `arr1` … `arrN` must have the same number of elements. +Returns an array obtained from the original arrays by application of `func(arr1[i], ..., arrN[i])` for each element. Arrays `arr1` ... `arrN` must have the same number of elements. Examples: @@ -2109,9 +2097,9 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res Note that the `arrayMap` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayFilter(func, arr1, …) +## arrayFilter(func, arr1, ...) -Returns an array containing only the elements in `arr1` for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns an array containing only the elements in `arr1` for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Examples: @@ -2142,9 +2130,9 @@ SELECT Note that the `arrayFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayFill(func, arr1, …) +## arrayFill(func, arr1, ...) -Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func(arr1[i], …, arrN[i])` returns 0. The first element of `arr1` will not be replaced. +Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func(arr1[i], ..., arrN[i])` returns 0. The first element of `arr1` will not be replaced. Examples: @@ -2160,9 +2148,9 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, Note that the `arrayFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayReverseFill(func, arr1, …) +## arrayReverseFill(func, arr1, ...) -Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func(arr1[i], …, arrN[i])` returns 0. The last element of `arr1` will not be replaced. +Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func(arr1[i], ..., arrN[i])` returns 0. The last element of `arr1` will not be replaced. Examples: @@ -2178,9 +2166,9 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, Note that the `arrayReverseFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arraySplit(func, arr1, …) +## arraySplit(func, arr1, ...) -Split `arr1` into multiple arrays. When `func(arr1[i], …, arrN[i])` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. +Split `arr1` into multiple arrays. When `func(arr1[i], ..., arrN[i])` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. Examples: @@ -2196,9 +2184,9 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the `arraySplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayReverseSplit(func, arr1, …) +## arrayReverseSplit(func, arr1, ...) -Split `arr1` into multiple arrays. When `func(arr1[i], …, arrN[i])` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. +Split `arr1` into multiple arrays. When `func(arr1[i], ..., arrN[i])` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. Examples: @@ -2214,30 +2202,30 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the `arrayReverseSplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayExists(\[func,\] arr1, …) +## arrayExists(\[func,\] arr1, ...) -Returns 1 if there is at least one element in `arr` for which `func(arr1[i], …, arrN[i])` returns something other than 0. Otherwise, it returns 0. +Returns 1 if there is at least one element in `arr` for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Otherwise, it returns 0. Note that the `arrayExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayAll(\[func,\] arr1, …) +## arrayAll(\[func,\] arr1, ...) -Returns 1 if `func(arr1[i], …, arrN[i])` returns something other than 0 for all the elements in arrays. Otherwise, it returns 0. +Returns 1 if `func(arr1[i], ..., arrN[i])` returns something other than 0 for all the elements in arrays. Otherwise, it returns 0. Note that the `arrayAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayFirst(func, arr1, …) +## arrayFirst(func, arr1, ...) -Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. ## arrayFirstOrNull -Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise it returns `NULL`. +Returns the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0, otherwise it returns `NULL`. **Syntax** ```sql -arrayFirstOrNull(func, arr1, …) +arrayFirstOrNull(func, arr1, ...) ``` **Parameters** @@ -2292,20 +2280,20 @@ Result: \N ``` -## arrayLast(func, arr1, …) +## arrayLast(func, arr1, ...) -Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. ## arrayLastOrNull -Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise returns `NULL`. +Returns the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0, otherwise returns `NULL`. **Syntax** ```sql -arrayLastOrNull(func, arr1, …) +arrayLastOrNull(func, arr1, ...) ``` **Parameters** @@ -2348,15 +2336,15 @@ Result: \N ``` -## arrayFirstIndex(func, arr1, …) +## arrayFirstIndex(func, arr1, ...) -Returns the index of the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the index of the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayLastIndex(func, arr1, …) +## arrayLastIndex(func, arr1, ...) -Returns the index of the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the index of the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayLastIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. @@ -2376,14 +2364,16 @@ arrayMin([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** - The minimum of function values (or the array minimum). -Type: if `func` is specified, matches `func` return value type, else matches the array elements type. +:::note +If `func` is specified, then the return type matches the return value type of `func`, otherwise it matches the type of the array elements. +::: **Examples** @@ -2431,14 +2421,16 @@ arrayMax([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** - The maximum of function values (or the array maximum). -Type: if `func` is specified, matches `func` return value type, else matches the array elements type. +:::note +if `func` is specified then the return type matches the return value type of `func`, otherwise it matches the type of the array elements. +::: **Examples** @@ -2486,14 +2478,21 @@ arraySum([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** - The sum of the function values (or the array sum). -Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). +:::note +Return type: + +- For decimal numbers in the source array (or for converted values, if `func` is specified) — [Decimal128](../data-types/decimal.md). +- For floating point numbers — [Float64](../data-types/float.md). +- For numeric unsigned — [UInt64](../data-types/int-uint.md). +- For numeric signed — [Int64](../data-types/int-uint.md). +::: **Examples** @@ -2541,14 +2540,12 @@ arrayAvg([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** -- The average of function values (or the array average). - -Type: [Float64](../../sql-reference/data-types/float.md). +- The average of function values (or the array average). [Float64](../data-types/float.md). **Examples** @@ -2580,9 +2577,9 @@ Result: └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) +## arrayCumSum(\[func,\] arr1, ...) -Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. +Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], ..., arrN[i])`. **Syntax** @@ -2592,13 +2589,11 @@ arrayCumSum(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** -- Returns an array of the partial sums of the elements in the source array. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +- Returns an array of the partial sums of the elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). Example: @@ -2614,9 +2609,9 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res Note that the `arrayCumSum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayCumSumNonNegative(\[func,\] arr1, …) +## arrayCumSumNonNegative(\[func,\] arr1, ...) -Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`. +Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], ..., arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`. **Syntax** @@ -2626,13 +2621,11 @@ arrayCumSumNonNegative(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** -- Returns an array of non-negative partial sums of elements in the source array. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +- Returns an array of non-negative partial sums of elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). ``` sql SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res @@ -2648,7 +2641,7 @@ Note that the `arraySumNonNegative` is a [higher-order function](../../sql-refer ## arrayProduct -Multiplies elements of an [array](../../sql-reference/data-types/array.md). +Multiplies elements of an [array](../data-types/array.md). **Syntax** @@ -2658,13 +2651,11 @@ arrayProduct(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** -- A product of array's elements. - -Type: [Float64](../../sql-reference/data-types/float.md). +- A product of array's elements. [Float64](../data-types/float.md). **Examples** @@ -2688,7 +2679,7 @@ Query: SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as res, toTypeName(res); ``` -Return value type is always [Float64](../../sql-reference/data-types/float.md). Result: +Return value type is always [Float64](../data-types/float.md). Result: ``` text ┌─res─┬─toTypeName(arrayProduct(array(toDecimal64(1, 8), toDecimal64(2, 8), toDecimal64(3, 8))))─┐ @@ -2698,7 +2689,7 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md). ## arrayRotateLeft -Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +Rotates an [array](../data-types/array.md) to the left by the specified number of elements. If the number of elements is negative, the array is rotated to the right. **Syntax** @@ -2709,14 +2700,12 @@ arrayRotateLeft(arr, n) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to rotate. **Returned value** -- An array rotated to the left by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array rotated to the left by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2764,7 +2753,7 @@ Result: ## arrayRotateRight -Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +Rotates an [array](../data-types/array.md) to the right by the specified number of elements. If the number of elements is negative, the array is rotated to the left. **Syntax** @@ -2775,14 +2764,12 @@ arrayRotateRight(arr, n) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to rotate. **Returned value** -- An array rotated to the right by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array rotated to the right by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2830,7 +2817,7 @@ Result: ## arrayShiftLeft -Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +Shifts an [array](../data-types/array.md) to the left by the specified number of elements. New elements are filled with the provided argument or the default value of the array element type. If the number of elements is negative, the array is shifted to the right. @@ -2842,15 +2829,13 @@ arrayShiftLeft(arr, n[, default]) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to shift. - `default` — Optional. Default value for new elements. **Returned value** -- An array shifted to the left by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array shifted to the left by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2926,7 +2911,7 @@ Result: ## arrayShiftRight -Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +Shifts an [array](../data-types/array.md) to the right by the specified number of elements. New elements are filled with the provided argument or the default value of the array element type. If the number of elements is negative, the array is shifted to the left. @@ -2938,15 +2923,13 @@ arrayShiftRight(arr, n[, default]) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to shift. - `default` — Optional. Default value for new elements. **Returned value** -- An array shifted to the right by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array shifted to the right by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -3038,9 +3021,7 @@ arrayRandomSample(arr, samples) **Returned Value** -- An array containing a random sample of elements from the input array. - -Type: [Array](../data-types/array.md). +- An array containing a random sample of elements from the input array. [Array](../data-types/array.md). **Examples** diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 0951c783aae..a48893b93bf 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -34,8 +34,8 @@ bitShiftLeft(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../data-types/int-uint.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** @@ -81,8 +81,8 @@ bitShiftRight(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../data-types/int-uint.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** @@ -131,13 +131,13 @@ bitSlice(s, offset[, length]) **Arguments** -- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s` — s is [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1. - `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right. **Returned value** -- The substring. [String](../../sql-reference/data-types/string.md) +- The substring. [String](../data-types/string.md) **Example** @@ -186,11 +186,9 @@ SELECT bitTest(number, index) - `number` – Integer number. - `index` – Position of bit. -**Returned values** +**Returned value** -Returns a value of bit at specified position. - -Type: `UInt8`. +- Value of the bit at the specified position. [UInt8](../data-types/int-uint.md). **Example** @@ -251,11 +249,9 @@ SELECT bitTestAll(number, index1, index2, index3, index4, ...) - `number` – Integer number. - `index1`, `index2`, `index3`, `index4` – Positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). -**Returned values** +**Returned value** -Returns result of logical conjuction. - -Type: `UInt8`. +- Result of the logical conjuction. [UInt8](../data-types/int-uint.md). **Example** @@ -316,11 +312,9 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...) - `number` – Integer number. - `index1`, `index2`, `index3`, `index4` – Positions of bit. -**Returned values** +**Returned value** -Returns result of logical disjunction. - -Type: `UInt8`. +- Result of the logical disjunction. [UInt8](../data-types/int-uint.md). **Example** @@ -368,15 +362,15 @@ bitCount(x) **Arguments** -- `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. +- `x` — [Integer](../data-types/int-uint.md) or [floating-point](../data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. **Returned value** -- Number of bits set to one in the input number. +- Number of bits set to one in the input number. [UInt8](../data-types/int-uint.md). -The function does not convert input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`. - -Type: `UInt8`. +:::note +The function does not convert the input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`. +::: **Example** @@ -408,14 +402,12 @@ bitHammingDistance(int1, int2) **Arguments** -- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md). -- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md). +- `int1` — First integer value. [Int64](../data-types/int-uint.md). +- `int2` — Second integer value. [Int64](../data-types/int-uint.md). **Returned value** -- The Hamming distance. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- The Hamming distance. [UInt8](../data-types/int-uint.md). **Examples** diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index 379be302881..a5c8a663b71 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -75,8 +75,8 @@ bitmapSubsetInRange(bitmap, range_start, range_end) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `range_end` – End of the range (exclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md). +- `range_end` – End of the range (exclusive). [UInt32](../data-types/int-uint.md). **Example** @@ -105,8 +105,8 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – Maximum cardinality of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md). +- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md). **Example** @@ -134,9 +134,9 @@ subBitmap(bitmap, offset, cardinality_limit) **Arguments** -- `bitmap` – The bitmap. Type: [Bitmap object](#bitmap_functions-bitmapbuild). -- `offset` – The position of the first element of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – The maximum number of elements in the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild). +- `offset` – The position of the first element of the subset. [UInt32](../data-types/int-uint.md). +- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md). **Example** @@ -163,14 +163,12 @@ bitmapContains(bitmap, needle) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `needle` – Searched bit value. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `needle` – Searched bit value. [UInt32](../data-types/int-uint.md). **Returned values** -- 0 — If `bitmap` does not contain `needle`. -- 1 — If `bitmap` contains `needle`. - -Type: `UInt8`. +- 0 — If `bitmap` does not contain `needle`. [UInt8](../data-types/int-uint.md). +- 1 — If `bitmap` contains `needle`. [UInt8](../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index eb4e98961f1..564186fd8db 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -234,3 +234,34 @@ SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) :::note The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. ::: + +## clamp + +Constrain the return value between A and B. + +**Syntax** + +``` sql +clamp(value, min, max) +``` + +**Arguments** + +- `value` – Input value. +- `min` – Limit the lower bound. +- `max` – Limit the upper bound. + +**Returned values** + +If the value is less than the minimum value, return the minimum value; if it is greater than the maximum value, return the maximum value; otherwise, return the current value. + +Examples: + +```sql +SELECT clamp(1, 2, 3) result, toTypeName(result) type; +``` +```response +┌─result─┬─type────┐ +│ 2 │ Float64 │ +└────────┴─────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index ed98f986139..4092c83954a 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -26,7 +26,7 @@ SELECT ## makeDate -Creates a [Date](../../sql-reference/data-types/date.md) +Creates a [Date](../data-types/date.md) - from a year, month and day argument, or - from a year and day of year argument. @@ -43,16 +43,14 @@ Alias: **Arguments** -- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `year` — Year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `month` — Month. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day` — Day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day_of_year` — Day of the year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- A date created from the arguments. - -Type: [Date](../../sql-reference/data-types/date.md). +- A date created from the arguments. [Date](../data-types/date.md). **Example** @@ -85,11 +83,11 @@ Result: ``` ## makeDate32 -Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md). +Like [makeDate](#makeDate) but produces a [Date32](../data-types/date32.md). ## makeDateTime -Creates a [DateTime](../../sql-reference/data-types/datetime.md) from a year, month, day, hour, minute and second argument. +Creates a [DateTime](../data-types/datetime.md) from a year, month, day, hour, minute and second argument. **Syntax** @@ -99,19 +97,17 @@ makeDateTime(year, month, day, hour, minute, second[, timezone]) **Arguments** -- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `year` — Year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `month` — Month. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day` — Day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `hour` — Hour. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `minute` — Minute. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `second` — Second. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). - `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). **Returned value** -- A date with time created from the arguments. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- A date with time created from the arguments. [DateTime](../data-types/datetime.md). **Example** @@ -129,7 +125,7 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). +Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). **Syntax** @@ -139,7 +135,7 @@ makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, t ## timestamp -Converts the first argument 'expr' to type [DateTime64(6)](../../sql-reference/data-types/datetime64.md). +Converts the first argument 'expr' to type [DateTime64(6)](../data-types/datetime64.md). If a second argument 'expr_time' is provided, it adds the specified time to the converted value. **Syntax** @@ -152,8 +148,8 @@ Alias: `TIMESTAMP` **Arguments** -- `expr` - Date or date with time. Type: [String](../../sql-reference/data-types/string.md). -- `expr_time` - Optional parameter. Time to add. [String](../../sql-reference/data-types/string.md). +- `expr` - Date or date with time. [String](../data-types/string.md). +- `expr_time` - Optional parameter. Time to add. [String](../data-types/string.md). **Examples** @@ -183,7 +179,7 @@ Result: **Returned value** -- [DateTime64](../../sql-reference/data-types/datetime64.md)(6) +- [DateTime64](../data-types/datetime64.md)(6) ## timeZone @@ -200,9 +196,7 @@ Alias: `timezone`. **Returned value** -- Timezone. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone. [String](../data-types/string.md). **Example** @@ -237,9 +231,7 @@ Alias: `serverTimezone`. **Returned value** -- Timezone. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone. [String](../data-types/string.md). **Example** @@ -273,14 +265,12 @@ Alias: `toTimezone`. **Arguments** -- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). +- `value` — Time or date and time. [DateTime64](../data-types/datetime64.md). +- `timezone` — Timezone for the returned value. [String](../data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). **Returned value** -- Date and time. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Date and time. [DateTime](../data-types/datetime.md). **Example** @@ -320,7 +310,7 @@ int32samoa: 1546300800 ## timeZoneOf -Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types. +Returns the timezone name of [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) data types. **Syntax** @@ -332,13 +322,11 @@ Alias: `timezoneOf`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- Timezone name. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone name. [String](../data-types/string.md). **Example** @@ -369,13 +357,11 @@ Alias: `timezoneOffset`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- Offset from UTC in seconds. - -Type: [Int32](../../sql-reference/data-types/int-uint.md). +- Offset from UTC in seconds. [Int32](../data-types/int-uint.md). **Example** @@ -410,9 +396,7 @@ Alias: `YEAR` **Returned value** -- The year of the given date/time - -Type: `UInt16` +- The year of the given date/time. [UInt16](../data-types/int-uint.md). **Example** @@ -446,9 +430,7 @@ Alias: `QUARTER` **Returned value** -- The quarter of the year (1, 2, 3 or 4) of the given date/time - -Type: `UInt8` +- The quarter of the year (1, 2, 3 or 4) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -482,9 +464,7 @@ Alias: `MONTH` **Returned value** -- The month of the year (1 - 12) of the given date/time - -Type: `UInt8` +- The month of the year (1 - 12) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -518,9 +498,7 @@ Alias: `DAYOFYEAR` **Returned value** -- The day of the year (1 - 366) of the given date/time - -Type: `UInt16` +- The day of the year (1 - 366) of the given date/time. [UInt16](../data-types/int-uint.md). **Example** @@ -554,9 +532,7 @@ Aliases: `DAYOFMONTH`, `DAY` **Returned value** -- The day of the month (1 - 31) of the given date/time - -Type: `UInt8` +- The day of the month (1 - 31) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -643,9 +619,7 @@ Alias: `HOUR` **Returned value** -- The hour of the day (0 - 23) of the given date/time - -Type: `UInt8` +- The hour of the day (0 - 23) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -679,9 +653,7 @@ Alias: `MINUTE` **Returned value** -- The minute of the hour (0 - 59) of the given date/time - -Type: `UInt8` +- The minute of the hour (0 - 59) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -715,9 +687,7 @@ Alias: `SECOND` **Returned value** -- The second in the minute (0 - 59) of the given date/time - -Type: `UInt8` +- The second in the minute (0 - 59) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -763,9 +733,7 @@ Result: **Returned value** -- The millisecond in the minute (0 - 59) of the given date/time - -Type: `UInt16` +- The millisecond in the minute (0 - 59) of the given date/time. [UInt16](../data-types/int-uint.md). ## toUnixTimestamp @@ -782,9 +750,7 @@ toUnixTimestamp(str, [timezone]) **Returned value** -- Returns the unix timestamp. - -Type: `UInt32`. +- Returns the unix timestamp. [UInt32](../data-types/int-uint.md). **Example** @@ -842,9 +808,7 @@ toStartOfYear(value) **Returned value** -- The first day of the year of the input date/time - -Type: `Date` +- The first day of the year of the input date/time. [Date](../data-types/date.md). **Example** @@ -876,9 +840,7 @@ toStartOfISOYear(value) **Returned value** -- The first day of the year of the input date/time - -Type: `Date` +- The first day of the year of the input date/time. [Date](../data-types/date.md). **Example** @@ -911,9 +873,7 @@ toStartOfQuarter(value) **Returned value** -- The first day of the quarter of the given date/time - -Type: `Date` +- The first day of the quarter of the given date/time. [Date](../data-types/date.md). **Example** @@ -945,9 +905,7 @@ toStartOfMonth(value) **Returned value** -- The first day of the month of the given date/time - -Type: `Date` +- The first day of the month of the given date/time. [Date](../data-types/date.md). **Example** @@ -985,9 +943,7 @@ Alias: `LAST_DAY` **Returned value** -- The last day of the month of the given date/time - -Type: `Date` +- The last day of the month of the given date/time=. [Date](../data-types/date.md). **Example** @@ -1019,9 +975,7 @@ toMonday(value) **Returned value** -- The date of the nearest Monday on or prior to the given date - -Type: `Date` +- The date of the nearest Monday on or prior to the given date. [Date](../data-types/date.md). **Example** @@ -1057,9 +1011,7 @@ toStartOfWeek(t[, mode[, timezone]]) **Returned value** -- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode - -Type: `Date` +- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode. [Date](../data-types/date.md). **Example** @@ -1102,9 +1054,7 @@ toLastDayOfWeek(t[, mode[, timezone]]) **Returned value** -- The date of the nearest Sunday or Monday on or after the given date, depending on the mode - -Type: `Date` +- The date of the nearest Sunday or Monday on or after the given date, depending on the mode. [Date](../data-types/date.md). **Example** @@ -1144,9 +1094,7 @@ toStartOfDay(value) **Returned value** -- The start of the day of the given date/time - -Type: `DateTime` +- The start of the day of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1178,9 +1126,7 @@ toStartOfHour(value) **Returned value** -- The start of the hour of the given date/time - -Type: `DateTime` +- The start of the hour of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1214,9 +1160,7 @@ toStartOfMinute(value) **Returned value** -- The start of the minute of the given date/time - -Type: `DateTime` +- The start of the minute of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1248,14 +1192,12 @@ toStartOfSecond(value, [timezone]) **Arguments** -- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime64](../data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md). **Returned value** -- Input value without sub-seconds. - -Type: [DateTime64](../../sql-reference/data-types/datetime64.md). +- Input value without sub-seconds. [DateTime64](../data-types/datetime64.md). **Examples** @@ -1293,6 +1235,168 @@ Result: - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. +## toStartOfMillisecond + +Rounds down a date with time to the start of the milliseconds. + +**Syntax** + +``` sql +toStartOfMillisecond(value, [timezone]) +``` + +**Arguments** + +- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Input value with sub-milliseconds. [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Examples** + +Query without timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64 +SELECT toStartOfMillisecond(dt64); +``` + +Result: + +``` text +┌────toStartOfMillisecond(dt64)─┐ +│ 2020-01-01 10:20:30.999000000 │ +└───────────────────────────────┘ +``` + +Query with timezone: + +``` sql +┌─toStartOfMillisecond(dt64, 'Asia/Istanbul')─┐ +│ 2020-01-01 12:20:30.999000000 │ +└─────────────────────────────────────────────┘ +``` + +Result: + +``` text +┌─toStartOfMillisecond(dt64, 'Asia/Istanbul')─┐ +│ 2020-01-01 12:20:30.999 │ +└─────────────────────────────────────────────┘ +``` + +## toStartOfMicrosecond + +Rounds down a date with time to the start of the microseconds. + +**Syntax** + +``` sql +toStartOfMicrosecond(value, [timezone]) +``` + +**Arguments** + +- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Input value with sub-microseconds. [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Examples** + +Query without timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64 +SELECT toStartOfMicrosecond(dt64); +``` + +Result: + +``` text +┌────toStartOfMicrosecond(dt64)─┐ +│ 2020-01-01 10:20:30.999999000 │ +└───────────────────────────────┘ +``` + +Query with timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64 +SELECT toStartOfMicrosecond(dt64, 'Asia/Istanbul'); +``` + +Result: + +``` text +┌─toStartOfMicrosecond(dt64, 'Asia/Istanbul')─┐ +│ 2020-01-01 12:20:30.999999000 │ +└─────────────────────────────────────────────┘ +``` + +**See also** + +- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. + +## toStartOfNanosecond + +Rounds down a date with time to the start of the nanoseconds. + +**Syntax** + +``` sql +toStartOfNanosecond(value, [timezone]) +``` + +**Arguments** + +- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Input value with nanoseconds. [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Examples** + +Query without timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64 +SELECT toStartOfNanosecond(dt64); +``` + +Result: + +``` text +┌─────toStartOfNanosecond(dt64)─┐ +│ 2020-01-01 10:20:30.999999999 │ +└───────────────────────────────┘ +``` + +Query with timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999999999', 9) AS dt64 +SELECT toStartOfNanosecond(dt64, 'Asia/Istanbul'); +``` + +Result: + +``` text +┌─toStartOfNanosecond(dt64, 'Asia/Istanbul')─┐ +│ 2020-01-01 12:20:30.999999999 │ +└────────────────────────────────────────────┘ +``` + +**See also** + +- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. + ## toStartOfFiveMinutes Rounds down a date with time to the start of the five-minute interval. @@ -1309,9 +1413,7 @@ toStartOfFiveMinutes(value) **Returned value** -- The start of the five-minute interval of the given date/time - -Type: `DateTime` +- The start of the five-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1349,9 +1451,7 @@ toStartOfTenMinutes(value) **Returned value** -- The start of the ten-minute interval of the given date/time - -Type: `DateTime` +- The start of the ten-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1389,9 +1489,7 @@ toStartOfFifteenMinutes(value) **Returned value** -- The start of the fifteen-minute interval of the given date/time - -Type: `DateTime` +- The start of the fifteen-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1417,31 +1515,31 @@ toStartOfFifteenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:15:00 This function generalizes other `toStartOf*()` functions with `toStartOfInterval(date_or_date_with_time, INTERVAL x unit [, time_zone])` syntax. For example, -- `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`, -- `toStartOfInterval(t, INTERVAL 1 month)` returns the same as `toStartOfMonth(t)`, -- `toStartOfInterval(t, INTERVAL 1 day)` returns the same as `toStartOfDay(t)`, -- `toStartOfInterval(t, INTERVAL 15 minute)` returns the same as `toStartOfFifteenMinutes(t)`. +- `toStartOfInterval(t, INTERVAL 1 YEAR)` returns the same as `toStartOfYear(t)`, +- `toStartOfInterval(t, INTERVAL 1 MONTH)` returns the same as `toStartOfMonth(t)`, +- `toStartOfInterval(t, INTERVAL 1 DAY)` returns the same as `toStartOfDay(t)`, +- `toStartOfInterval(t, INTERVAL 15 MINUTE)` returns the same as `toStartOfFifteenMinutes(t)`. The calculation is performed relative to specific points in time: | Interval | Start | |-------------|------------------------| -| year | year 0 | -| quarter | 1900 Q1 | -| month | 1900 January | -| week | 1970, 1st week (01-05) | -| day | 1970-01-01 | -| hour | (*) | -| minute | 1970-01-01 00:00:00 | -| second | 1970-01-01 00:00:00 | -| millisecond | 1970-01-01 00:00:00 | -| microsecond | 1970-01-01 00:00:00 | -| nanosecond | 1970-01-01 00:00:00 | +| YEAR | year 0 | +| QUARTER | 1900 Q1 | +| MONTH | 1900 January | +| WEEK | 1970, 1st week (01-05) | +| DAY | 1970-01-01 | +| HOUR | (*) | +| MINUTE | 1970-01-01 00:00:00 | +| SECOND | 1970-01-01 00:00:00 | +| MILLISECOND | 1970-01-01 00:00:00 | +| MICROSECOND | 1970-01-01 00:00:00 | +| NANOSECOND | 1970-01-01 00:00:00 | (*) hour intervals are special: the calculation is always performed relative to 00:00:00 (midnight) of the current day. As a result, only hour values between 1 and 23 are useful. -If unit `week` was specified, `toStartOfInterval` assumes that weeks start on Monday. Note that this behavior is different from that of function `toStartOfWeek` in which weeks start by default on Sunday. +If unit `WEEK` was specified, `toStartOfInterval` assumes that weeks start on Monday. Note that this behavior is different from that of function `toStartOfWeek` in which weeks start by default on Sunday. **See Also** @@ -1499,7 +1597,7 @@ This function returns the week number for date or datetime. The two-argument for The following table describes how the mode argument works. -| Mode | First day of week | Range | Week 1 is the first week … | +| Mode | First day of week | Range | Week 1 is the first week ... | |------|-------------------|-------|-------------------------------| | 0 | Sunday | 0-53 | with a Sunday in this year | | 1 | Monday | 0-53 | with 4 or more days this year | @@ -1598,14 +1696,12 @@ Alias: `TO_DAYS` **Arguments** -- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `date` — The date to calculate the number of days passed since year zero from. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** -The number of days passed since date 0000-01-01. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +The number of days passed since date 0000-01-01. [UInt32](../data-types/int-uint.md). **Example** @@ -1629,7 +1725,7 @@ Result: Returns for a given number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) the corresponding date in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`FROM_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_from-days) function. -The result is undefined if it cannot be represented within the bounds of the [Date](../../sql-reference/data-types/date.md) type. +The result is undefined if it cannot be represented within the bounds of the [Date](../data-types/date.md) type. **Syntax** @@ -1645,9 +1741,7 @@ Alias: `FROM_DAYS` **Returned value** -The date corresponding to the number of days passed since year zero. - -Type: [Date](../../sql-reference/data-types/date.md). +The date corresponding to the number of days passed since year zero. [Date](../data-types/date.md). **Example** @@ -1669,7 +1763,7 @@ Result: ## fromDaysSinceYearZero32 -Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../../sql-reference/data-types/date32.md). +Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../data-types/date32.md). ## age @@ -1686,7 +1780,7 @@ age('unit', startdate, enddate, [timezone]) **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../data-types/string.md). Possible values: - `nanosecond`, `nanoseconds`, `ns` @@ -1701,17 +1795,15 @@ age('unit', startdate, enddate, [timezone]) - `quarter`, `quarters`, `qq`, `q` - `year`, `years`, `yyyy`, `yy` -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md). **Returned value** -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). +Difference between `enddate` and `startdate` expressed in `unit`. [Int](../data-types/int-uint.md). **Example** @@ -1764,7 +1856,7 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../data-types/string.md). Possible values: - `nanosecond`, `nanoseconds`, `ns` @@ -1779,17 +1871,15 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `quarter`, `quarters`, `qq`, `q` - `year`, `years`, `yyyy`, `yy` -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md). **Returned value** -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). +Difference between `enddate` and `startdate` expressed in `unit`. [Int](../data-types/int-uint.md). **Example** @@ -1853,14 +1943,12 @@ Alias: `dateTrunc`. `unit` argument is case-insensitive. -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md). **Returned value** -- Value, truncated to the specified part of date. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Value, truncated to the specified part of date. [DateTime](../data-types/datetime.md). **Example** @@ -1918,7 +2006,7 @@ Aliases: `dateAdd`, `DATE_ADD`. **Arguments** -- `unit` — The type of interval to add. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. +- `unit` — The type of interval to add. Note: This is not a [String](../data-types/string.md) and must therefore not be quoted. Possible values: - `second` @@ -1930,14 +2018,12 @@ Aliases: `dateAdd`, `DATE_ADD`. - `quarter` - `year` -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to add. [Int](../data-types/int-uint.md). +- `date` — The date or date with time to which `value` is added. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -1994,7 +2080,7 @@ Aliases: `dateSub`, `DATE_SUB`. **Arguments** -- `unit` — The type of interval to subtract. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. +- `unit` — The type of interval to subtract. Note: This is not a [String](../data-types/string.md) and must therefore not be quoted. Possible values: @@ -2007,14 +2093,12 @@ Aliases: `dateSub`, `DATE_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to subtract. [Int](../data-types/int-uint.md). +- `date` — The date or date with time from which `value` is subtracted. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2063,9 +2147,9 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `value` — Value of interval to add. [Int](../data-types/int-uint.md). +- `unit` — The type of interval to add. [String](../data-types/string.md). Possible values: - `second` @@ -2079,9 +2163,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Returned value** -Date or date with time with the specified `value` expressed in `unit` added to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time with the specified `value` expressed in `unit` added to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2113,7 +2195,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. **Arguments** -- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to subtract. [String](../data-types/string.md). Possible values: - `second` @@ -2125,14 +2207,12 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to subtract. [Int](../data-types/int-uint.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2162,14 +2242,12 @@ addDate(date, interval) **Arguments** -- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), or [String](../../sql-reference/data-types/string.md) -- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md). +- `date` — The date or date with time to which `interval` is added. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md), [DateTime64](../data-types/datetime64.md), or [String](../data-types/string.md) +- `interval` — Interval to add. [Interval](../data-types/special-data-types/interval.md). **Returned value** -Date or date with time obtained by adding `interval` to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by adding `interval` to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2205,14 +2283,12 @@ subDate(date, interval) **Arguments** -- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), or [String](../../sql-reference/data-types/string.md) -- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md). +- `date` — The date or date with time from which `interval` is subtracted. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md), [DateTime64](../data-types/datetime64.md), or [String](../data-types/string.md) +- `interval` — Interval to subtract. [Interval](../data-types/special-data-types/interval.md). **Returned value** -Date or date with time obtained by subtracting `interval` from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `interval` from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2248,13 +2324,11 @@ now([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date and time. [DateTime](../data-types/datetime.md). **Example** @@ -2299,13 +2373,11 @@ now64([scale], [timezone]) **Arguments** - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically, are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time with sub-second precision. - -Type: [DateTime64](../../sql-reference/data-types/datetime64.md). +- Current date and time with sub-second precision. [DateTime64](../data-types/datetime64.md). **Example** @@ -2335,13 +2407,11 @@ nowInBlock([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time at the moment of processing of each block of data. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date and time at the moment of processing of each block of data. [DateTime](../data-types/datetime.md). **Example** @@ -2381,9 +2451,7 @@ today() **Returned value** -- Current date - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date. [DateTime](../data-types/datetime.md). **Example** @@ -2473,7 +2541,7 @@ Result: ## YYYYMMDDToDate -Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md). +Converts a number containing the year, month and day number to a [Date](../data-types/date.md). This function is the opposite of function `toYYYYMMDD()`. @@ -2487,13 +2555,11 @@ YYYYMMDDToDate(yyyymmdd); **Arguments** -- `yyyymmdd` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `yyyymmdd` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- a date created from the arguments. - -Type: [Date](../../sql-reference/data-types/date.md). +- a date created from the arguments. [Date](../data-types/date.md). **Example** @@ -2511,11 +2577,11 @@ Result: ## YYYYMMDDToDate32 -Like function `YYYYMMDDToDate()` but produces a [Date32](../../sql-reference/data-types/date32.md). +Like function `YYYYMMDDToDate()` but produces a [Date32](../data-types/date32.md). ## YYYYMMDDhhmmssToDateTime -Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../../sql-reference/data-types/datetime.md). +Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../data-types/datetime.md). The output is undefined if the input does not encode a valid DateTime value. @@ -2529,14 +2595,12 @@ YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]); **Arguments** -- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). - `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). **Returned value** -- a date with time created from the arguments. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- a date with time created from the arguments. [DateTime](../data-types/datetime.md). **Example** @@ -2554,17 +2618,31 @@ Result: ## YYYYMMDDhhmmssToDateTime64 -Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). +Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../data-types/datetime64.md). Accepts an additional, optional `precision` parameter after the `timezone` parameter. -## addYears, addQuarters, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addMilliseconds, addMicroseconds, addNanoseconds +## addYears -These functions add units of the interval specified by the function name to a date, a date with time or a string-encoded date / date with time. A date or date with time is returned. +Adds a specified number of years to a date, a date with time or a string-encoded date / date with time. -Example: +**Syntax** -``` sql +```sql +addYears(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to add specified number of years to. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of years to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` plus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql WITH toDate('2024-01-01') AS date, toDateTime('2024-01-01 00:00:00') AS date_time, @@ -2575,19 +2653,477 @@ SELECT addYears(date_time_string, 1) AS add_years_with_date_time_string ``` -``` text +```response ┌─add_years_with_date─┬─add_years_with_date_time─┬─add_years_with_date_time_string─┐ │ 2025-01-01 │ 2025-01-01 00:00:00 │ 2025-01-01 00:00:00.000 │ └─────────────────────┴──────────────────────────┴─────────────────────────────────┘ ``` -## subtractYears, subtractQuarters, subtractMonths, subtractWeeks, subtractDays, subtractHours, subtractMinutes, subtractSeconds, subtractMilliseconds, subtractMicroseconds, subtractNanoseconds +## addQuarters -These functions subtract units of the interval specified by the function name from a date, a date with time or a string-encoded date / date with time. A date or date with time is returned. +Adds a specified number of quarters to a date, a date with time or a string-encoded date / date with time. -Example: +**Syntax** -``` sql +```sql +addQuarters(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to add specified number of quarters to. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of quarters to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` plus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addQuarters(date, 1) AS add_quarters_with_date, + addQuarters(date_time, 1) AS add_quarters_with_date_time, + addQuarters(date_time_string, 1) AS add_quarters_with_date_time_string +``` + +```response +┌─add_quarters_with_date─┬─add_quarters_with_date_time─┬─add_quarters_with_date_time_string─┐ +│ 2024-04-01 │ 2024-04-01 00:00:00 │ 2024-04-01 00:00:00.000 │ +└────────────────────────┴─────────────────────────────┴────────────────────────────────────┘ +``` + +## addMonths + +Adds a specified number of months to a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +addMonths(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to add specified number of months to. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of months to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` plus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addMonths(date, 6) AS add_months_with_date, + addMonths(date_time, 6) AS add_months_with_date_time, + addMonths(date_time_string, 6) AS add_months_with_date_time_string +``` + +```response +┌─add_months_with_date─┬─add_months_with_date_time─┬─add_months_with_date_time_string─┐ +│ 2024-07-01 │ 2024-07-01 00:00:00 │ 2024-07-01 00:00:00.000 │ +└──────────────────────┴───────────────────────────┴──────────────────────────────────┘ +``` + +## addWeeks + +Adds a specified number of weeks to a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +addWeeks(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to add specified number of weeks to. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of weeks to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` plus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addWeeks(date, 5) AS add_weeks_with_date, + addWeeks(date_time, 5) AS add_weeks_with_date_time, + addWeeks(date_time_string, 5) AS add_weeks_with_date_time_string +``` + +```response +┌─add_weeks_with_date─┬─add_weeks_with_date_time─┬─add_weeks_with_date_time_string─┐ +│ 2024-02-05 │ 2024-02-05 00:00:00 │ 2024-02-05 00:00:00.000 │ +└─────────────────────┴──────────────────────────┴─────────────────────────────────┘ +``` + +## addDays + +Adds a specified number of days to a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +addDays(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to add specified number of days to. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of days to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` plus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addDays(date, 5) AS add_days_with_date, + addDays(date_time, 5) AS add_days_with_date_time, + addDays(date_time_string, 5) AS add_days_with_date_time_string +``` + +```response +┌─add_days_with_date─┬─add_days_with_date_time─┬─add_days_with_date_time_string─┐ +│ 2024-01-06 │ 2024-01-06 00:00:00 │ 2024-01-06 00:00:00.000 │ +└────────────────────┴─────────────────────────┴────────────────────────────────┘ +``` + +## addHours + +Adds a specified number of days to a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +addHours(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to add specified number of hours to. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of hours to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` plus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addHours(date, 12) AS add_hours_with_date, + addHours(date_time, 12) AS add_hours_with_date_time, + addHours(date_time_string, 12) AS add_hours_with_date_time_string +``` + +```response +┌─add_hours_with_date─┬─add_hours_with_date_time─┬─add_hours_with_date_time_string─┐ +│ 2024-01-01 12:00:00 │ 2024-01-01 12:00:00 │ 2024-01-01 12:00:00.000 │ +└─────────────────────┴──────────────────────────┴─────────────────────────────────┘ +``` + +## addMinutes + +Adds a specified number of minutes to a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +addMinutes(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to add specified number of minutes to. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of minutes to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` plus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addMinutes(date, 20) AS add_minutes_with_date, + addMinutes(date_time, 20) AS add_minutes_with_date_time, + addMinutes(date_time_string, 20) AS add_minutes_with_date_time_string +``` + +```response +┌─add_minutes_with_date─┬─add_minutes_with_date_time─┬─add_minutes_with_date_time_string─┐ +│ 2024-01-01 00:20:00 │ 2024-01-01 00:20:00 │ 2024-01-01 00:20:00.000 │ +└───────────────────────┴────────────────────────────┴───────────────────────────────────┘ +``` + +## addSeconds + +Adds a specified number of seconds to a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +addSeconds(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to add specified number of seconds to. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of seconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` plus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addSeconds(date, 30) AS add_seconds_with_date, + addSeconds(date_time, 30) AS add_seconds_with_date_time, + addSeconds(date_time_string, 30) AS add_seconds_with_date_time_string +``` + +```response +┌─add_seconds_with_date─┬─add_seconds_with_date_time─┬─add_seconds_with_date_time_string─┐ +│ 2024-01-01 00:00:30 │ 2024-01-01 00:00:30 │ 2024-01-01 00:00:30.000 │ +└───────────────────────┴────────────────────────────┴───────────────────────────────────┘ +``` + +## addMilliseconds + +Adds a specified number of milliseconds to a date with time or a string-encoded date with time. + +**Syntax** + +```sql +addMilliseconds(date_time, num) +``` + +**Parameters** + +- `date_time`: Date with time to add specified number of milliseconds to. [DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of milliseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date_time` plus `num` milliseconds. [DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addMilliseconds(date_time, 1000) AS add_milliseconds_with_date_time, + addMilliseconds(date_time_string, 1000) AS add_milliseconds_with_date_time_string +``` + +```response +┌─add_milliseconds_with_date_time─┬─add_milliseconds_with_date_time_string─┐ +│ 2024-01-01 00:00:01.000 │ 2024-01-01 00:00:01.000 │ +└─────────────────────────────────┴────────────────────────────────────────┘ +``` + +## addMicroseconds + +Adds a specified number of microseconds to a date with time or a string-encoded date with time. + +**Syntax** + +```sql +addMicroseconds(date_time, num) +``` + +**Parameters** + +- `date_time`: Date with time to add specified number of microseconds to. [DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of microseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date_time` plus `num` microseconds. [DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addMicroseconds(date_time, 1000000) AS add_microseconds_with_date_time, + addMicroseconds(date_time_string, 1000000) AS add_microseconds_with_date_time_string +``` + +```response +┌─add_microseconds_with_date_time─┬─add_microseconds_with_date_time_string─┐ +│ 2024-01-01 00:00:01.000000 │ 2024-01-01 00:00:01.000000 │ +└─────────────────────────────────┴────────────────────────────────────────┘ +``` + +## addNanoseconds + +Adds a specified number of microseconds to a date with time or a string-encoded date with time. + +**Syntax** + +```sql +addNanoseconds(date_time, num) +``` + +**Parameters** + +- `date_time`: Date with time to add specified number of nanoseconds to. [DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of nanoseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date_time` plus `num` nanoseconds. [DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + addNanoseconds(date_time, 1000) AS add_nanoseconds_with_date_time, + addNanoseconds(date_time_string, 1000) AS add_nanoseconds_with_date_time_string +``` + +```response +┌─add_nanoseconds_with_date_time─┬─add_nanoseconds_with_date_time_string─┐ +│ 2024-01-01 00:00:00.000001000 │ 2024-01-01 00:00:00.000001000 │ +└────────────────────────────────┴───────────────────────────────────────┘ +``` + +## addInterval + +Adds an interval to another interval or tuple of intervals. + +**Syntax** + +```sql +addInterval(interval_1, interval_2) +``` + +**Parameters** + +- `interval_1`: First interval or tuple of intervals. [interval](../data-types/special-data-types/interval.md), [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). +- `interval_2`: Second interval to be added. [interval](../data-types/special-data-types/interval.md). + +**Returned value** +- Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). + +:::note +Intervals of the same type will be combined into a single interval. For instance if `toIntervalDay(1)` and `toIntervalDay(2)` are passed then the result will be `(3)` rather than `(1,1)`. +::: + +**Example** + +Query: + +```sql +SELECT addInterval(INTERVAL 1 DAY, INTERVAL 1 MONTH); +SELECT addInterval((INTERVAL 1 DAY, INTERVAL 1 YEAR), INTERVAL 1 MONTH); +SELECT addInterval(INTERVAL 2 DAY, INTERVAL 1 DAY); +``` + +Result: + +```response +┌─addInterval(toIntervalDay(1), toIntervalMonth(1))─┐ +│ (1,1) │ +└───────────────────────────────────────────────────┘ +┌─addInterval((toIntervalDay(1), toIntervalYear(1)), toIntervalMonth(1))─┐ +│ (1,1,1) │ +└────────────────────────────────────────────────────────────────────────┘ +┌─addInterval(toIntervalDay(2), toIntervalDay(1))─┐ +│ (3) │ +└─────────────────────────────────────────────────┘ +``` + +## addTupleOfIntervals + +Consecutively adds a tuple of intervals to a Date or a DateTime. + +**Syntax** + +```sql +addTupleOfIntervals(interval_1, interval_2) +``` + +**Parameters** + +- `date`: First interval or interval of tuples. [date](../data-types/date.md)/[date32](../data-types/date32.md)/[datetime](../data-types/datetime.md)/[datetime64](../data-types/datetime64.md). +- `intervals`: Tuple of intervals to add to `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). + +**Returned value** +- Returns `date` with added `intervals`. [date](../data-types/date.md)/[date32](../data-types/date32.md)/[datetime](../data-types/datetime.md)/[datetime64](../data-types/datetime64.md). + +**Example** + +Query: + +```sql +WITH toDate('2018-01-01') AS date +SELECT addTupleOfIntervals(date, (INTERVAL 1 DAY, INTERVAL 1 MONTH, INTERVAL 1 YEAR)) +``` + +Result: + +```response +┌─addTupleOfIntervals(date, (toIntervalDay(1), toIntervalMonth(1), toIntervalYear(1)))─┐ +│ 2019-02-02 │ +└──────────────────────────────────────────────────────────────────────────────────────┘ +``` +## subtractYears + +Subtracts a specified number of years from a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +subtractYears(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to subtract specified number of years from. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of years to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` minus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql WITH toDate('2024-01-01') AS date, toDateTime('2024-01-01 00:00:00') AS date_time, @@ -2598,12 +3134,456 @@ SELECT subtractYears(date_time_string, 1) AS subtract_years_with_date_time_string ``` -``` text +```response ┌─subtract_years_with_date─┬─subtract_years_with_date_time─┬─subtract_years_with_date_time_string─┐ │ 2023-01-01 │ 2023-01-01 00:00:00 │ 2023-01-01 00:00:00.000 │ └──────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ ``` +## subtractQuarters + +Subtracts a specified number of quarters from a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +subtractQuarters(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to subtract specified number of quarters from. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of quarters to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` minus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractQuarters(date, 1) AS subtract_quarters_with_date, + subtractQuarters(date_time, 1) AS subtract_quarters_with_date_time, + subtractQuarters(date_time_string, 1) AS subtract_quarters_with_date_time_string +``` + +```response +┌─subtract_quarters_with_date─┬─subtract_quarters_with_date_time─┬─subtract_quarters_with_date_time_string─┐ +│ 2023-10-01 │ 2023-10-01 00:00:00 │ 2023-10-01 00:00:00.000 │ +└─────────────────────────────┴──────────────────────────────────┴─────────────────────────────────────────┘ +``` + +## subtractMonths + +Subtracts a specified number of months from a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +subtractMonths(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to subtract specified number of months from. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of months to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` minus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractMonths(date, 1) AS subtract_months_with_date, + subtractMonths(date_time, 1) AS subtract_months_with_date_time, + subtractMonths(date_time_string, 1) AS subtract_months_with_date_time_string +``` + +```response +┌─subtract_months_with_date─┬─subtract_months_with_date_time─┬─subtract_months_with_date_time_string─┐ +│ 2023-12-01 │ 2023-12-01 00:00:00 │ 2023-12-01 00:00:00.000 │ +└───────────────────────────┴────────────────────────────────┴───────────────────────────────────────┘ +``` + +## subtractWeeks + +Subtracts a specified number of weeks from a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +subtractWeeks(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to subtract specified number of weeks from. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of weeks to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` minus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractWeeks(date, 1) AS subtract_weeks_with_date, + subtractWeeks(date_time, 1) AS subtract_weeks_with_date_time, + subtractWeeks(date_time_string, 1) AS subtract_weeks_with_date_time_string +``` + +```response + ┌─subtract_weeks_with_date─┬─subtract_weeks_with_date_time─┬─subtract_weeks_with_date_time_string─┐ + │ 2023-12-25 │ 2023-12-25 00:00:00 │ 2023-12-25 00:00:00.000 │ + └──────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ +``` + +## subtractDays + +Subtracts a specified number of days from a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +subtractDays(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to subtract specified number of days from. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of days to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` minus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractDays(date, 31) AS subtract_days_with_date, + subtractDays(date_time, 31) AS subtract_days_with_date_time, + subtractDays(date_time_string, 31) AS subtract_days_with_date_time_string +``` + +```response +┌─subtract_days_with_date─┬─subtract_days_with_date_time─┬─subtract_days_with_date_time_string─┐ +│ 2023-12-01 │ 2023-12-01 00:00:00 │ 2023-12-01 00:00:00.000 │ +└─────────────────────────┴──────────────────────────────┴─────────────────────────────────────┘ +``` + +## subtractHours + +Subtracts a specified number of hours from a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +subtractHours(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to subtract specified number of hours from. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[Datetime](../data-types/datetime.md)/[Datetime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of hours to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` minus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[Datetime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractHours(date, 12) AS subtract_hours_with_date, + subtractHours(date_time, 12) AS subtract_hours_with_date_time, + subtractHours(date_time_string, 12) AS subtract_hours_with_date_time_string +``` + +```response +┌─subtract_hours_with_date─┬─subtract_hours_with_date_time─┬─subtract_hours_with_date_time_string─┐ +│ 2023-12-31 12:00:00 │ 2023-12-31 12:00:00 │ 2023-12-31 12:00:00.000 │ +└──────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ +``` + +## subtractMinutes + +Subtracts a specified number of minutes from a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +subtractMinutes(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to subtract specified number of minutes from. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of minutes to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` minus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractMinutes(date, 30) AS subtract_minutes_with_date, + subtractMinutes(date_time, 30) AS subtract_minutes_with_date_time, + subtractMinutes(date_time_string, 30) AS subtract_minutes_with_date_time_string +``` + +```response +┌─subtract_minutes_with_date─┬─subtract_minutes_with_date_time─┬─subtract_minutes_with_date_time_string─┐ +│ 2023-12-31 23:30:00 │ 2023-12-31 23:30:00 │ 2023-12-31 23:30:00.000 │ +└────────────────────────────┴─────────────────────────────────┴────────────────────────────────────────┘ +``` + +## subtractSeconds + +Subtracts a specified number of seconds from a date, a date with time or a string-encoded date / date with time. + +**Syntax** + +```sql +subtractSeconds(date, num) +``` + +**Parameters** + +- `date`: Date / date with time to subtract specified number of seconds from. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of seconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date` minus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractSeconds(date, 60) AS subtract_seconds_with_date, + subtractSeconds(date_time, 60) AS subtract_seconds_with_date_time, + subtractSeconds(date_time_string, 60) AS subtract_seconds_with_date_time_string +``` + +```response +┌─subtract_seconds_with_date─┬─subtract_seconds_with_date_time─┬─subtract_seconds_with_date_time_string─┐ +│ 2023-12-31 23:59:00 │ 2023-12-31 23:59:00 │ 2023-12-31 23:59:00.000 │ +└────────────────────────────┴─────────────────────────────────┴────────────────────────────────────────┘ +``` + +## subtractMilliseconds + +Subtracts a specified number of milliseconds from a date with time or a string-encoded date with time. + +**Syntax** + +```sql +subtractMilliseconds(date_time, num) +``` + +**Parameters** + +- `date_time`: Date with time to subtract specified number of milliseconds from. [DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of milliseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date_time` minus `num` milliseconds. [DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractMilliseconds(date_time, 1000) AS subtract_milliseconds_with_date_time, + subtractMilliseconds(date_time_string, 1000) AS subtract_milliseconds_with_date_time_string +``` + +```response +┌─subtract_milliseconds_with_date_time─┬─subtract_milliseconds_with_date_time_string─┐ +│ 2023-12-31 23:59:59.000 │ 2023-12-31 23:59:59.000 │ +└──────────────────────────────────────┴─────────────────────────────────────────────┘ +``` + +## subtractMicroseconds + +Subtracts a specified number of microseconds from a date with time or a string-encoded date with time. + +**Syntax** + +```sql +subtractMicroseconds(date_time, num) +``` + +**Parameters** + +- `date_time`: Date with time to subtract specified number of microseconds from. [DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of microseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date_time` minus `num` microseconds. [DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractMicroseconds(date_time, 1000000) AS subtract_microseconds_with_date_time, + subtractMicroseconds(date_time_string, 1000000) AS subtract_microseconds_with_date_time_string +``` + +```response +┌─subtract_microseconds_with_date_time─┬─subtract_microseconds_with_date_time_string─┐ +│ 2023-12-31 23:59:59.000000 │ 2023-12-31 23:59:59.000000 │ +└──────────────────────────────────────┴─────────────────────────────────────────────┘ +``` + +## subtractNanoseconds + +Subtracts a specified number of nanoseconds from a date with time or a string-encoded date with time. + +**Syntax** + +```sql +subtractNanoseconds(date_time, num) +``` + +**Parameters** + +- `date_time`: Date with time to subtract specified number of nanoseconds from. [DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md), [String](../data-types/string.md). +- `num`: Number of nanoseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). + +**Returned value** +- Returns `date_time` minus `num` nanoseconds. [DateTime64](../data-types/datetime64.md). + +**Example** + +```sql +WITH + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string +SELECT + subtractNanoseconds(date_time, 1000) AS subtract_nanoseconds_with_date_time, + subtractNanoseconds(date_time_string, 1000) AS subtract_nanoseconds_with_date_time_string +``` + +```response +┌─subtract_nanoseconds_with_date_time─┬─subtract_nanoseconds_with_date_time_string─┐ +│ 2023-12-31 23:59:59.999999000 │ 2023-12-31 23:59:59.999999000 │ +└─────────────────────────────────────┴────────────────────────────────────────────┘ +``` + +## subtractInterval + +Adds a negated interval to another interval or tuple of intervals. + +**Syntax** + +```sql +subtractInterval(interval_1, interval_2) +``` + +**Parameters** + +- `interval_1`: First interval or interval of tuples. [interval](../data-types/special-data-types/interval.md), [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). +- `interval_2`: Second interval to be negated. [interval](../data-types/special-data-types/interval.md). + +**Returned value** +- Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). + +:::note +Intervals of the same type will be combined into a single interval. For instance if `toIntervalDay(2)` and `toIntervalDay(1)` are passed then the result will be `(1)` rather than `(2,1)` +::: + +**Example** + +Query: + +```sql +SELECT subtractInterval(INTERVAL 1 DAY, INTERVAL 1 MONTH); +SELECT subtractInterval((INTERVAL 1 DAY, INTERVAL 1 YEAR), INTERVAL 1 MONTH); +SELECT subtractInterval(INTERVAL 2 DAY, INTERVAL 1 DAY); +``` + +Result: + +```response +┌─subtractInterval(toIntervalDay(1), toIntervalMonth(1))─┐ +│ (1,-1) │ +└────────────────────────────────────────────────────────┘ +┌─subtractInterval((toIntervalDay(1), toIntervalYear(1)), toIntervalMonth(1))─┐ +│ (1,1,-1) │ +└─────────────────────────────────────────────────────────────────────────────┘ +┌─subtractInterval(toIntervalDay(2), toIntervalDay(1))─┐ +│ (1) │ +└──────────────────────────────────────────────────────┘ +``` + +## subtractTupleOfIntervals + +Consecutively subtracts a tuple of intervals from a Date or a DateTime. + +**Syntax** + +```sql +subtractTupleOfIntervals(interval_1, interval_2) +``` + +**Parameters** + +- `date`: First interval or interval of tuples. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). +- `intervals`: Tuple of intervals to subtract from `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). + +**Returned value** +- Returns `date` with subtracted `intervals`. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Example** + +Query: + +```sql +WITH toDate('2018-01-01') AS date SELECT subtractTupleOfIntervals(date, (INTERVAL 1 DAY, INTERVAL 1 YEAR)) +``` + +Result: + +```response +┌─subtractTupleOfIntervals(date, (toIntervalDay(1), toIntervalYear(1)))─┐ +│ 2016-12-31 │ +└───────────────────────────────────────────────────────────────────────┘ +``` + ## timeSlots(StartTime, Duration,\[, Size\]) For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter set to 1800 (30 minutes) by default. @@ -2635,7 +3615,7 @@ Formats a Time according to the given Format string. Format is a constant expres formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. -The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime). +The opposite operation of this function is [parseDateTime](../functions/type-conversion-functions.md#type_conversion_functions-parseDateTime). Alias: `DATE_FORMAT`. @@ -2761,7 +3741,7 @@ LIMIT 10 Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. -The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax). +The opposite operation of this function is [parseDateTimeInJodaSyntax](../functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax). **Replacement fields** @@ -2821,15 +3801,13 @@ dateName(date_part, date) **Arguments** -- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../data-types/string.md). +- `date` — Date. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `timezone` — Timezone. Optional. [String](../data-types/string.md). **Returned value** -- The specified part of date. - -Type: [String](../../sql-reference/data-types/string.md#string) +- The specified part of date. [String](../data-types/string.md#string) **Example** @@ -2861,13 +3839,11 @@ monthName(date) **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- The name of the month. - -Type: [String](../../sql-reference/data-types/string.md#string) +- The name of the month. [String](../data-types/string.md#string) **Example** @@ -2890,7 +3866,7 @@ This function converts a Unix timestamp to a calendar date and a time of a day. It can be called in two ways: -When given a single argument of type [Integer](../../sql-reference/data-types/int-uint.md), it returns a value of type [DateTime](../../sql-reference/data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime). +When given a single argument of type [Integer](../data-types/int-uint.md), it returns a value of type [DateTime](../data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime). Alias: `FROM_UNIXTIME`. @@ -2908,7 +3884,7 @@ Result: └──────────────────────────────┘ ``` -When given two or three arguments where the first argument is a value of type [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../../sql-reference/data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used. +When given two or three arguments where the first argument is a value of type [Integer](../data-types/int-uint.md), [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used. **Example:** @@ -2958,13 +3934,11 @@ toModifiedJulianDay(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. - -Type: [Int32](../../sql-reference/data-types/int-uint.md). +- Modified Julian Day number. [Int32](../data-types/int-uint.md). **Example** @@ -2992,13 +3966,11 @@ toModifiedJulianDayOrNull(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. - -Type: [Nullable(Int32)](../../sql-reference/data-types/int-uint.md). +- Modified Julian Day number. [Nullable(Int32)](../data-types/int-uint.md). **Example** @@ -3026,13 +3998,11 @@ fromModifiedJulianDay(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../data-types/int-uint.md). **Returned value** -- Date in text form. - -Type: [String](../../sql-reference/data-types/string.md) +- Date in text form. [String](../data-types/string.md) **Example** @@ -3060,13 +4030,11 @@ fromModifiedJulianDayOrNull(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../data-types/int-uint.md). **Returned value** -- Date in text form. - -Type: [Nullable(String)](../../sql-reference/data-types/string.md) +- Date in text form. [Nullable(String)](../data-types/string.md) **Example** @@ -3094,8 +4062,8 @@ toUTCTimestamp(time_val, time_zone) **Arguments** -- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** @@ -3127,8 +4095,8 @@ fromUTCTimestamp(time_val, time_zone) **Arguments** -- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** @@ -3147,6 +4115,43 @@ Result: │ 2023-03-16 18:00:00.000 │ └─────────────────────────────────────────────────────────────────────────┘ ``` + +## UTCTimestamp + +Returns the current date and time at the moment of query analysis. The function is a constant expression. + +:::note +This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now-now) is the preferred usage. +::: + +**Syntax** + +```sql +UTCTimestamp() +``` + +Alias: `UTC_timestamp`. + +**Returned value** + +- Returns the current date and time at the moment of query analysis. [DateTime](../data-types/datetime.md). + +**Example** + +Query: + +```sql +SELECT UTCTimestamp(); +``` + +Result: + +```response +┌──────UTCTimestamp()─┐ +│ 2024-05-28 08:32:09 │ +└─────────────────────┘ +``` + ## timeDiff Returns the difference between two dates or dates with time values. The difference is calculated in units of seconds. It is same as `dateDiff` and was added only for MySQL support. `dateDiff` is preferred. @@ -3159,8 +4164,8 @@ timeDiff(first_datetime, second_datetime) *Arguments** -- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) **Returned value** diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 5f3514049c7..a455d0af91b 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -20,13 +20,11 @@ Alias: `normL1`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. - -Type: [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Examples** @@ -58,13 +56,11 @@ Alias: `normL2`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). - -Type: [Float](../../sql-reference/data-types/float.md). +- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). [Float](../data-types/float.md). **Example** @@ -95,13 +91,11 @@ Alias: `normL2Squared`. ***Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L2-norm squared. - -Type: [Float](../../sql-reference/data-types/float.md). +- L2-norm squared. [Float](../data-types/float.md). **Example** @@ -133,13 +127,11 @@ Alias: `normLinf`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Linf-norm or the maximum absolute value. - -Type: [Float](../../sql-reference/data-types/float.md). +- Linf-norm or the maximum absolute value. [Float](../data-types/float.md). **Example** @@ -171,14 +163,12 @@ Alias: `normLp`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm) - -Type: [Float](../../sql-reference/data-types/float.md). +- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm). [Float](../data-types/float.md). **Example** @@ -210,14 +200,12 @@ Alias: `distanceL1`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- 1-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- 1-norm distance. [Float](../data-types/float.md). **Example** @@ -249,14 +237,12 @@ Alias: `distanceL2`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- 2-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- 2-norm distance. [Float](../data-types/float.md). **Example** @@ -288,12 +274,12 @@ Alias: `distanceL2Squared`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -Type: [Float](../../sql-reference/data-types/float.md). +- Sum of the squares of the difference between the corresponding elements of two vectors. [Float](../data-types/float.md). **Example** @@ -325,14 +311,12 @@ Alias: `distanceLinf`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector1` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector1` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Infinity-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- Infinity-norm distance. [Float](../data-types/float.md). **Example** @@ -364,15 +348,13 @@ Alias: `distanceLp`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- p-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- p-norm distance. [Float](../data-types/float.md). **Example** @@ -405,13 +387,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -443,13 +423,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -481,13 +459,11 @@ Alias: `normalizeLinf `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -519,14 +495,12 @@ Alias: `normalizeLp `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `p` — The power. Possible values: any number from [1;inf). [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -556,14 +530,12 @@ cosineDistance(vector1, vector2) **Arguments** -- `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First tuple. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second tuple. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Cosine of the angle between two vectors subtracted from one. - -Type: [Float](../../sql-reference/data-types/float.md). +- Cosine of the angle between two vectors subtracted from one. [Float](../data-types/float.md). **Examples** diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 4f6da764b3c..24a95b0398b 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -18,13 +18,11 @@ char(number_1, [number_2, ..., number_n]); **Arguments** -- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). +- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../data-types/int-uint.md), [Float](../data-types/float.md). **Returned value** -- a string of given bytes. - -Type: `String`. +- a string of given bytes. [String](../data-types/string.md). **Example** @@ -88,23 +86,21 @@ The function is using uppercase letters `A-F` and not using any prefixes (like ` For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if the leading digit is zero. -Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime). +Values of type [Date](../data-types/date.md) and [DateTime](../data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime). -For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted. +For [String](../data-types/string.md) and [FixedString](../data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted. -Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. +Values of [Float](../data-types/float.md) and [Decimal](../data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string. **Arguments** -- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to hexadecimal. Types: [String](../data-types/string.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). **Returned value** -- A string with the hexadecimal representation of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string with the hexadecimal representation of the argument. [String](../data-types/string.md). **Examples** @@ -171,7 +167,7 @@ Performs the opposite operation of [hex](#hex). It interprets each pair of hexad If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions. -:::note +:::note If `unhex` is invoked from within the `clickhouse-client`, binary strings display using UTF-8. ::: @@ -185,15 +181,13 @@ unhex(arg) **Arguments** -- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). +- `arg` — A string containing any number of hexadecimal digits. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md). Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex(). **Returned value** -- A binary string (BLOB). - -Type: [String](../../sql-reference/data-types/string.md). +- A binary string (BLOB). [String](../data-types/string.md). **Example** @@ -237,23 +231,21 @@ Alias: `BIN`. For integer arguments, it prints bin digits from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints eight digits of every byte if the leading digit is zero. -Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`). +Values of type [Date](../data-types/date.md) and [DateTime](../data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`). -For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted. +For [String](../data-types/string.md) and [FixedString](../data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted. -Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. +Values of [Float](../data-types/float.md) and [Decimal](../data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string. **Arguments** -- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to binary. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), [Date](../data-types/date.md), or [DateTime](../data-types/datetime.md). **Returned value** -- A string with the binary representation of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string with the binary representation of the argument. [String](../data-types/string.md). **Examples** @@ -330,21 +322,19 @@ Alias: `UNBIN`. For a numeric argument `unbin()` does not return the inverse of `bin()`. If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) functions. -:::note +:::note If `unbin` is invoked from within the `clickhouse-client`, binary strings are displayed using UTF-8. ::: -Supports binary digits `0` and `1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isn’t thrown). +Supports binary digits `0` and `1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isn’t thrown). **Arguments** -- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md). +- `arg` — A string containing any number of binary digits. [String](../data-types/string.md). **Returned value** -- A binary string (BLOB). - -Type: [String](../../sql-reference/data-types/string.md). +- A binary string (BLOB). [String](../data-types/string.md). **Examples** @@ -396,13 +386,11 @@ bitPositionsToArray(arg) **Arguments** -- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `arg` — Integer value. [Int/UInt](../data-types/int-uint.md). **Returned value** -- An array containing a list of positions of bits that equal `1`, in ascending order. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- An array containing a list of positions of bits that equal `1`, in ascending order. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -454,13 +442,11 @@ mortonEncode(args) **Parameters** -- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. +- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type. **Returned value** -- A UInt64 code - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- A UInt64 code. [UInt64](../data-types/int-uint.md) **Example** @@ -477,7 +463,7 @@ Result: ### Expanded mode -Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments. +Accepts a range mask ([tuple](../data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../data-types/int-uint.md) as other arguments. Each number in the mask configures the amount of range expansion:
1 - no expansion
@@ -494,15 +480,13 @@ mortonEncode(range_mask, args) **Parameters** - `range_mask`: 1-8. -- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. +- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type. -Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. +Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. **Returned value** -- A UInt64 code - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- A UInt64 code. [UInt64](../data-types/int-uint.md) **Example** @@ -595,7 +579,7 @@ Result: **implementation details** -Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. +Please note that you can fit only so many bits of information into Morton code as [UInt64](../data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. ## mortonDecode @@ -617,13 +601,11 @@ mortonDecode(tuple_size, code) **Parameters** - `tuple_size`: integer value no more than 8. -- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code. +- `code`: [UInt64](../data-types/int-uint.md) code. **Returned value** -- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- [tuple](../data-types/tuple.md) of the specified size. [UInt64](../data-types/int-uint.md) **Example** @@ -644,7 +626,7 @@ Result: Accepts a range mask (tuple) as a first argument and the code as the second argument. Each number in the mask configures the amount of range shrink:
1 - no shrink
-2 - 2x shrink
+2 - 2x shrink
3 - 3x shrink
...
Up to 8x shrink.
@@ -719,6 +701,267 @@ Result: 1 2 3 4 5 6 7 8 ``` +## hilbertEncode + +Calculates code for Hilbert Curve for a list of unsigned integers. + +The function has two modes of operation: +- Simple +- Expanded + +### Simple mode + +Simple: accepts up to 2 unsigned integers as arguments and produces a UInt64 code. + +**Syntax** + +```sql +hilbertEncode(args) +``` + +**Parameters** + +- `args`: up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. + +**Returned value** + +- A UInt64 code + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + +**Example** + +Query: + +```sql +SELECT hilbertEncode(3, 4); +``` +Result: + +```response +31 +``` + +### Expanded mode + +Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments. + +Each number in the mask configures the number of bits by which the corresponding argument will be shifted left, effectively scaling the argument within its range. + +**Syntax** + +```sql +hilbertEncode(range_mask, args) +``` + +**Parameters** +- `range_mask`: ([tuple](../../sql-reference/data-types/tuple.md)) +- `args`: up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. + +Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. + +**Returned value** + +- A UInt64 code + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) +**Example** +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). + +Query: + +```sql +SELECT hilbertEncode((10,6), 1024, 16); +``` + +Result: + +```response +4031541586602 +``` + +Note: tuple size must be equal to the number of the other arguments. + +**Example** + +For a single argument without a tuple, the function returns the argument itself as the Hilbert index, since no dimensional mapping is needed. + +Query: + +```sql +SELECT hilbertEncode(1); +``` + +Result: + +```response +1 +``` + +**Example** + +If a single argument is provided with a tuple specifying bit shifts, the function shifts the argument left by the specified number of bits. + +Query: + +```sql +SELECT hilbertEncode(tuple(2), 128); +``` + +Result: + +```response +512 +``` + +**Example** + +The function also accepts columns as arguments: + +Query: + +First create the table and insert some data. + +```sql +create table hilbert_numbers( + n1 UInt32, + n2 UInt32 +) +Engine=MergeTree() +ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into hilbert_numbers (*) values(1,2); +``` +Use column names instead of constants as function arguments to `hilbertEncode` + +Query: + +```sql +SELECT hilbertEncode(n1, n2) FROM hilbert_numbers; +``` + +Result: + +```response +13 +``` + +**implementation details** + +Please note that you can fit only so many bits of information into Hilbert code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each. All overflow will be clamped to zero. + +## hilbertDecode + +Decodes a Hilbert curve index back into a tuple of unsigned integers, representing coordinates in multi-dimensional space. + +As with the `hilbertEncode` function, this function has two modes of operation: +- Simple +- Expanded + +### Simple mode + +Accepts up to 2 unsigned integers as arguments and produces a UInt64 code. + +**Syntax** + +```sql +hilbertDecode(tuple_size, code) +``` + +**Parameters** +- `tuple_size`: integer value no more than 2. +- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code. + +**Returned value** + +- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + +**Example** + +Query: + +```sql +SELECT hilbertDecode(2, 31); +``` + +Result: + +```response +["3", "4"] +``` + +### Expanded mode + +Accepts a range mask (tuple) as a first argument and up to 2 unsigned integers as other arguments. +Each number in the mask configures the number of bits by which the corresponding argument will be shifted left, effectively scaling the argument within its range. + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). +As with the encode function, this is limited to 8 numbers at most. + +**Example** + +Hilbert code for one argument is always the argument itself (as a tuple). + +Query: + +```sql +SELECT hilbertDecode(1, 1); +``` + +Result: + +```response +["1"] +``` + +**Example** + +A single argument with a tuple specifying bit shifts will be right-shifted accordingly. + +Query: + +```sql +SELECT hilbertDecode(tuple(2), 32768); +``` + +Result: + +```response +["128"] +``` + +**Example** + +The function accepts a column of codes as a second argument: + +First create the table and insert some data. + +Query: +```sql +create table hilbert_numbers( + n1 UInt32, + n2 UInt32 +) +Engine=MergeTree() +ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into hilbert_numbers (*) values(1,2); +``` +Use column names instead of constants as function arguments to `hilbertDecode` + +Query: + +```sql +select untuple(hilbertDecode(2, hilbertEncode(n1, n2))) from hilbert_numbers; +``` + +Result: + +```response +1 2 +``` diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 00c9ef376d3..5d82e26eb32 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -30,15 +30,15 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../data-types/string.md#string). +- `plaintext` — Text that need to be encrypted. [String](../data-types/string.md#string). +- `key` — Encryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../data-types/string.md#string). +- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../data-types/string.md#string). **Returned value** -- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../data-types/string.md#string). **Examples** @@ -123,14 +123,14 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../data-types/string.md#string). +- `plaintext` — Text that needs to be encrypted. [String](../data-types/string.md#string). +- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../data-types/string.md#string). **Returned value** -- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../data-types/string.md#string). **Examples** @@ -230,15 +230,15 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../data-types/string.md#string). +- `key` — Decryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../data-types/string.md#string). +- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../data-types/string.md#string). **Examples** @@ -361,14 +361,14 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../data-types/string.md#string). +- `key` — Decryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Optional. [String](../data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../data-types/string.md#string). **Examples** diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 4149afce044..82c21ce40c8 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -25,9 +25,9 @@ dictGetOrNull('dict_name', attr_name, id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. -- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. +- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. +- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. **Returned value** @@ -239,14 +239,12 @@ dictHas('dict_name', id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. **Returned value** -- 0, if there is no key. -- 1, if there is a key. - -Type: `UInt8`. +- 0, if there is no key. [UInt8](../data-types/int-uint.md). +- 1, if there is a key. [UInt8](../data-types/int-uint.md). ## dictGetHierarchy @@ -261,13 +259,11 @@ dictGetHierarchy('dict_name', key) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned value** -- Parents for the key. - -Type: [Array(UInt64)](../../sql-reference/data-types/array.md). +- Parents for the key. [Array(UInt64)](../data-types/array.md). ## dictIsIn @@ -280,15 +276,13 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. +- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned value** -- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. -- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. - -Type: `UInt8`. +- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. [UInt8](../data-types/int-uint.md). +- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. [UInt8](../data-types/int-uint.md). ## dictGetChildren @@ -303,13 +297,11 @@ dictGetChildren(dict_name, key) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned values** -- First-level descendants for the key. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- First-level descendants for the key. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -352,14 +344,12 @@ dictGetDescendants(dict_name, key, level) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. +- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../data-types/int-uint.md). **Returned values** -- Descendants for the key. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- Descendants for the key. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -419,8 +409,8 @@ dictGetAll('dict_name', attr_names, id_expr[, limit]) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. - `limit` - Maximum length for each value array returned. When truncating, child nodes are given precedence over parent nodes, and otherwise the defined list order for the regexp tree dictionary is respected. If unspecified, array length is unlimited. **Returned value** @@ -509,7 +499,7 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md) or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. - `default_value_expr` — Value returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. **Returned value** diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index d62cd1db88d..ac9e21cd416 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -19,7 +19,7 @@ file(path[, default]) **Arguments** - `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. -- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). +- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 4dfbf4262ed..a0dfbebc8ae 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -12,6 +12,8 @@ Returns whether the argument is [NULL](../../sql-reference/syntax.md#null). See also operator [`IS NULL`](../operators/index.md#is_null). +**Syntax** + ``` sql isNull(x) ``` @@ -52,6 +54,45 @@ Result: └───┘ ``` +## isNullable + +Returns `1` if a column is [Nullable](../data-types/nullable.md) (i.e allows `NULL` values), `0` otherwise. + +**Syntax** + +``` sql +isNullable(x) +``` + +**Arguments** + +- `x` — column. + +**Returned value** + +- `1` if `x` allows `NULL` values. [UInt8](../data-types/int-uint.md). +- `0` if `x` does not allow `NULL` values. [UInt8](../data-types/int-uint.md). + +**Example** + +Query: + +``` sql +CREATE TABLE tab (ordinary_col UInt32, nullable_col Nullable(UInt32)) ENGINE = Log; +INSERT INTO tab (ordinary_col, nullable_col) VALUES (1,1), (2, 2), (3,3); +SELECT isNullable(ordinary_col), isNullable(nullable_col) FROM tab; +``` + +Result: + +``` text + ┌───isNullable(ordinary_col)──┬───isNullable(nullable_col)──┐ +1. │ 0 │ 1 │ +2. │ 0 │ 1 │ +3. │ 0 │ 1 │ + └─────────────────────────────┴─────────────────────────────┘ +``` + ## isNotNull Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal). @@ -96,6 +137,36 @@ Result: └───┘ ``` +## isNotDistinctFrom + +Performs null-safe comparison. Used to compare JOIN keys which contain NULL values in the JOIN ON section. +This function will consider two `NULL` values as identical and will return `true`, which is distinct from the usual +equals behavior where comparing two `NULL` values would return `NULL`. + +:::note +This function is an internal function used by the implementation of JOIN ON. Please do not use it manually in queries. +::: + +**Syntax** + +``` sql +isNotDistinctFrom(x, y) +``` + +**Arguments** + +- `x` — first JOIN key. +- `y` — second JOIN key. + +**Returned value** + +- `true` when `x` and `y` are both `NULL`. +- `false` otherwise. + +**Example** + +For a complete example see: [NULL values in JOIN keys](../../sql-reference/statements/select/join#null-values-in-join-keys). + ## isZeroOrNull Returns whether the argument is 0 (zero) or [NULL](../../sql-reference/syntax.md#null-literal). @@ -280,7 +351,7 @@ Result: ## assumeNotNull -Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`. +Returns the corresponding non-`Nullable` value for a value of [Nullable](../data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`. ``` sql assumeNotNull(x) diff --git a/docs/en/sql-reference/functions/geo/coordinates.md b/docs/en/sql-reference/functions/geo/coordinates.md index 1cbc1933206..d10573b8995 100644 --- a/docs/en/sql-reference/functions/geo/coordinates.md +++ b/docs/en/sql-reference/functions/geo/coordinates.md @@ -152,8 +152,8 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...) **Input values** -- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers. -- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. +- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../data-types/tuple.md) — A tuple of two numbers. +- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. - The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons. **Returned values** diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index ce16af44e90..8abc8006e5d 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -74,11 +74,11 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi **Arguments** -- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. [Float](../../data-types/float.md). +- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. [Float](../../data-types/float.md). +- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. [Float](../../data-types/float.md). +- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. [Float](../../data-types/float.md). +- `precision` — Geohash precision. Range: `[1, 12]`. [UInt8](../../data-types/int-uint.md). :::note All coordinate parameters must be of the same type: either `Float32` or `Float64`. @@ -86,11 +86,9 @@ All coordinate parameters must be of the same type: either `Float32` or `Float64 **Returned values** -- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. +- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. [Array](../../data-types/array.md)([String](../../data-types/string.md)). - `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values. -Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). - :::note Function throws an exception if resulting array is over 10’000’000 items long. ::: diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 29486c58e6a..bcdd457964a 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -26,14 +26,12 @@ h3IsValid(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- 1 — The number is a valid H3 index. -- 0 — The number is not a valid H3 index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — The number is a valid H3 index. [UInt8](../../data-types/int-uint.md). +- 0 — The number is not a valid H3 index. [UInt8](../../data-types/int-uint.md). **Example** @@ -63,14 +61,12 @@ h3GetResolution(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- Index resolution. Range: `[0, 15]`. -- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). +- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. [UInt8](../../data-types/int-uint.md). **Example** @@ -100,11 +96,11 @@ h3EdgeAngle(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../data-types/float.md). **Example** @@ -134,11 +130,11 @@ h3EdgeLengthM(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../data-types/float.md). **Example** @@ -168,11 +164,11 @@ h3EdgeLengthKm(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../data-types/float.md). **Example** @@ -202,16 +198,14 @@ geoToH3(lon, lat, resolution) **Arguments** -- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `lon` — Longitude. [Float64](../../data-types/float.md). +- `lat` — Latitude. [Float64](../../data-types/float.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- Hexagon index number. -- 0 in case of error. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. [UInt64](../../data-types/int-uint.md). +- 0 in case of error. [UInt64](../../data-types/int-uint.md). **Example** @@ -241,11 +235,11 @@ h3ToGeo(h3Index) **Arguments** -- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../data-types/float.md). `lat` — Latitude. [Float64](../../data-types/float.md). **Example** @@ -275,12 +269,11 @@ h3ToGeoBoundary(h3Index) **Arguments** -- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array of pairs '(lon, lat)'. -Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). +- Array of pairs '(lon, lat)'. [Array](../../data-types/array.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). **Example** @@ -311,14 +304,12 @@ h3kRing(h3index, k) **Arguments** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md) +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `k` — Radius. [integer](../../data-types/int-uint.md) **Returned values** -- Array of H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -354,13 +345,11 @@ h3GetBaseCell(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Hexagon base cell number. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Hexagon base cell number. [UInt8](../../data-types/int-uint.md). **Example** @@ -390,13 +379,11 @@ h3HexAreaM2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Area in square meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Area in square meters. [Float64](../../data-types/float.md). **Example** @@ -426,13 +413,11 @@ h3HexAreaKm2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Area in square kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Area in square kilometers. [Float64](../../data-types/float.md). **Example** @@ -462,15 +447,13 @@ h3IndexesAreNeighbors(index1, index2) **Arguments** -- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index1` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `index2` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Indexes are neighbours. -- `0` — Indexes are not neighbours. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Indexes are neighbours. [UInt8](../../data-types/int-uint.md). +- `0` — Indexes are not neighbours. [UInt8](../../data-types/int-uint.md). **Example** @@ -500,14 +483,12 @@ h3ToChildren(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- Array of the child H3-indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of the child H3-indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -537,14 +518,12 @@ h3ToParent(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Parent H3 index. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Parent H3 index. [UInt64](../../data-types/int-uint.md). **Example** @@ -572,13 +551,11 @@ h3ToString(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- String representation of the H3 index. - -Type: [String](../../../sql-reference/data-types/string.md). +- String representation of the H3 index. [String](../../data-types/string.md). **Example** @@ -608,11 +585,11 @@ stringToH3(index_str) **Parameter** -- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). +- `index_str` — String representation of the H3 index. [String](../../data-types/string.md). **Returned value** -- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. Returns 0 on error. [UInt64](../../data-types/int-uint.md). **Example** @@ -642,11 +619,11 @@ h3GetResolution(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Example** @@ -676,14 +653,12 @@ h3IsResClassIII(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Index has a resolution with Class III orientation. -- `0` — Index doesn't have a resolution with Class III orientation. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Index has a resolution with Class III orientation. [UInt8](../../data-types/int-uint.md). +- `0` — Index doesn't have a resolution with Class III orientation. [UInt8](../../data-types/int-uint.md). **Example** @@ -713,14 +688,12 @@ h3IsPentagon(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Index represents a pentagonal cell. -- `0` — Index doesn't represent a pentagonal cell. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Index represents a pentagonal cell. [UInt8](../../data-types/int-uint.md). +- `0` — Index doesn't represent a pentagonal cell. [UInt8](../../data-types/int-uint.md). **Example** @@ -750,13 +723,11 @@ h3GetFaces(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array containing icosahedron faces intersected by a given H3 index. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array containing icosahedron faces intersected by a given H3 index. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -786,13 +757,11 @@ h3CellAreaM2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Cell area in square meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Cell area in square meters. [Float64](../../data-types/float.md). **Example** @@ -822,13 +791,11 @@ h3CellAreaRads2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Cell area in square radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Cell area in square radians. [Float64](../../data-types/float.md). **Example** @@ -858,14 +825,12 @@ h3ToCenterChild(index, resolution) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../data-types/int-uint.md). **Example** @@ -895,13 +860,11 @@ h3ExactEdgeLengthM(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in meters. [Float64](../../data-types/float.md). **Example** @@ -931,13 +894,11 @@ h3ExactEdgeLengthKm(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in kilometers. [Float64](../../data-types/float.md). **Example** @@ -967,13 +928,11 @@ h3ExactEdgeLengthRads(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in radians. [Float64](../../data-types/float.md). **Example** @@ -1003,13 +962,11 @@ h3NumHexagons(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Number of H3 indices. - -Type: [Int64](../../../sql-reference/data-types/int-uint.md). +- Number of H3 indices. [Int64](../../data-types/int-uint.md). **Example** @@ -1039,14 +996,12 @@ h3PointDistM(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in meters.[Float64](../../data-types/float.md). **Example** @@ -1076,14 +1031,12 @@ h3PointDistKm(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in kilometers. [Float64](../../data-types/float.md). **Example** @@ -1113,14 +1066,12 @@ h3PointDistRads(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in radians. [Float64](../../data-types/float.md). **Example** @@ -1150,9 +1101,7 @@ h3GetRes0Indexes() **Returned values** -- Array of all the resolution 0 H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of all the resolution 0 H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1183,13 +1132,11 @@ h3GetPentagonIndexes(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Array of all pentagon H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of all pentagon H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1219,14 +1166,12 @@ h3Line(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. [UInt64](../../data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. [UInt64](../../data-types/int-uint.md). **Returned value** -Array of h3 indexes representing the line of indices between the two provided indices: - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +Array of h3 indexes representing the line of indices between the two provided indices. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1256,14 +1201,12 @@ h3Distance(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. [UInt64](../../data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. [UInt64](../../data-types/int-uint.md). **Returned value** -- Number of grid cells. - -Type: [Int64](../../../sql-reference/data-types/int-uint.md). +- Number of grid cells. [Int64](../../data-types/int-uint.md). Returns a negative number if finding the distance fails. @@ -1297,14 +1240,12 @@ h3HexRing(index, k) **Parameter** -- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents the origin. [UInt64](../../data-types/int-uint.md). +- `k` — Distance. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array of H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1334,14 +1275,12 @@ h3GetUnidirectionalEdge(originIndex, destinationIndex) **Parameter** -- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `originIndex` — Origin Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `destinationIndex` — Destination Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Unidirectional Edge Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Unidirectional Edge Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1371,14 +1310,12 @@ h3UnidirectionalEdgeisValid(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- 1 — The H3 index is a valid unidirectional edge. -- 0 — The H3 index is not a valid unidirectional edge. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — The H3 index is a valid unidirectional edge. [UInt8](../../data-types/int-uint.md). +- 0 — The H3 index is not a valid unidirectional edge. [UInt8](../../data-types/int-uint.md). **Example** @@ -1408,13 +1345,11 @@ h3GetOriginIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Origin Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Origin Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1444,13 +1379,11 @@ h3GetDestinationIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Destination Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Destination Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1480,14 +1413,14 @@ h3GetIndexesFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** A tuple consisting of two values `tuple(origin,destination)`: -- `origin` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destination` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `origin` — Origin Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `destination` — Destination Hexagon index number. [UInt64](../../data-types/int-uint.md). Returns `(0,0)` if the provided input is not valid. @@ -1519,13 +1452,11 @@ h3GetUnidirectionalEdgesFromHexagon(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -Array of h3 indexes representing each unidirectional edge: - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +Array of h3 indexes representing each unidirectional edge. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1555,12 +1486,11 @@ h3GetUnidirectionalEdgeBoundary(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Array of pairs '(lon, lat)'. - Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). +- Array of pairs '(lon, lat)'. [Array](../../data-types/array.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). **Example** diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index f4702eff44b..3165b21318b 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -21,14 +21,12 @@ geoToS2(lon, lat) **Arguments** -- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- `lon` — Longitude. [Float64](../../data-types/float.md). +- `lat` — Latitude. [Float64](../../data-types/float.md). **Returned values** -- S2 point index. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- S2 point index. [UInt64](../../data-types/int-uint.md). **Example** @@ -58,13 +56,13 @@ s2ToGeo(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. - -Type: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md). +- A [tuple](../../data-types/tuple.md) consisting of two values: + - `lon`. [Float64](../../data-types/float.md). + - `lat`. [Float64](../../data-types/float.md). **Example** @@ -94,13 +92,11 @@ s2GetNeighbors(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../data-types/int-uint.md). -**Returned values** +**Returned value** -- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -130,14 +126,12 @@ s2CellsIntersect(s2index1, s2index2) **Arguments** -- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the cells intersect. -- 0 — If the cells don't intersect. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — If the cells intersect. [UInt8](../../data-types/int-uint.md). +- `0` — If the cells don't intersect. [UInt8](../../data-types/int-uint.md). **Example** @@ -167,16 +161,14 @@ s2CapContains(center, degrees, point) **Arguments** -- `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md). -- `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `center` — S2 point index corresponding to the cap. [UInt64](../../data-types/int-uint.md). +- `degrees` — Radius of the cap in degrees. [Float64](../../data-types/float.md). +- `point` — S2 point index. [UInt64](../../data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the cap contains the S2 point index. -- 0 — If the cap doesn't contain the S2 point index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — If the cap contains the S2 point index. [UInt8](../../data-types/int-uint.md). +- `0` — If the cap doesn't contain the S2 point index. [UInt8](../../data-types/int-uint.md). **Example** @@ -206,13 +198,13 @@ s2CapUnion(center1, radius1, center2, radius2) **Arguments** -- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../data-types/int-uint.md). +- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../data-types/float.md). **Returned values** -- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md). +- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. [UInt64](../../data-types/int-uint.md). +- `radius` — Radius of the smallest cap containing the two input caps. [Float64](../../data-types/float.md). **Example** @@ -242,14 +234,14 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). +- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. [UInt64](../../data-types/float.md). **Example** @@ -279,14 +271,14 @@ s2RectContains(s2PointLow, s2PointHi, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Point` — Target S2 point index. [UInt64](../../data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the rectangle contains the given S2 point. -- 0 — If the rectangle doesn't contain the given S2 point. +- `1` — If the rectangle contains the given S2 point. +- `0` — If the rectangle doesn't contain the given S2 point. **Example** @@ -316,13 +308,13 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi) **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. [UInt64](../../data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. [UInt64](../../data-types/int-uint.md). **Example** @@ -352,13 +344,13 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/geo/svg.md b/docs/en/sql-reference/functions/geo/svg.md index c565d1f9de7..320d4542fee 100644 --- a/docs/en/sql-reference/functions/geo/svg.md +++ b/docs/en/sql-reference/functions/geo/svg.md @@ -23,13 +23,11 @@ Aliases: `SVG`, `svg` **Returned value** -- The SVG representation of the geometry: +- The SVG representation of the geometry. [String](../../data-types/string). - SVG circle - SVG polygon - SVG path -Type: [String](../../data-types/string) - **Examples** **Circle** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 1cd7eeb7c83..506114038f7 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -12,7 +12,7 @@ Simhash is a hash function, which returns close hash values for close (similar) ## halfMD5 -[Interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. +[Interprets](../functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. ```sql halfMD5(par1, ...) @@ -23,11 +23,11 @@ Consider using the [sipHash64](#siphash64) function instead. **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -61,7 +61,7 @@ sipHash64(par1,...) This is a cryptographic hash function. It works at least three times faster than the [MD5](#md5) hash function. -The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: +The function [interprets](../functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: 1. The first and the second hash value are concatenated to an array which is hashed. 2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. @@ -69,11 +69,11 @@ The function [interprets](/docs/en/sql-reference/functions/type-conversion-funct **Arguments** -The function takes a variable number of input parameters of any of the [supported data types](/docs/en/sql-reference/data-types/index.md). +The function takes a variable number of input parameters of any of the [supported data types](../data-types/index.md). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. Note that the calculated hash values may be equal for the same input values of different argument types. This affects for example integer types of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data. @@ -105,7 +105,7 @@ Same as [sipHash64](#siphash64), but the first argument is a tuple of two UInt64 **Returned value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -143,7 +143,7 @@ Same as for [sipHash64](#siphash64). **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -183,7 +183,7 @@ Same as [sipHash128](#siphash128), but the first argument is a tuple of two UInt **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -217,7 +217,7 @@ Same as for [sipHash128](#siphash128). **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -251,7 +251,7 @@ Same as [sipHash128Reference](#siphash128reference), but the first argument is a **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -283,11 +283,11 @@ Note that Google changed the algorithm of CityHash after it has been added to Cl **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Examples** @@ -321,7 +321,7 @@ It works faster than intHash32. Average quality. ## SHA1, SHA224, SHA256, SHA512, SHA512_256 -Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). **Syntax** @@ -337,17 +337,15 @@ Even in these cases, we recommend applying the function offline and pre-calculat **Arguments** -- `s` — Input string for SHA hash calculation. [String](/docs/en/sql-reference/data-types/string.md). +- `s` — Input string for SHA hash calculation. [String](../data-types/string.md). **Returned value** -- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). - -Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). [FixedString](../data-types/fixedstring.md). **Example** -Use the [hex](/docs/en/sql-reference/functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. +Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. Query: @@ -365,7 +363,7 @@ Result: ## BLAKE3 -Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). **Syntax** @@ -377,17 +375,15 @@ This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust **Arguments** -- s - input string for BLAKE3 hash calculation. [String](/docs/en/sql-reference/data-types/string.md). +- s - input string for BLAKE3 hash calculation. [String](../data-types/string.md). **Return value** -- BLAKE3 hash as a byte array with type FixedString(32). - -Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +- BLAKE3 hash as a byte array with type FixedString(32). [FixedString](../data-types/fixedstring.md). **Example** -Use function [hex](/docs/en/sql-reference/functions/encoding-functions.md/#hex) to represent the result as a hex-encoded string. +Use function [hex](../functions/encoding-functions.md/#hex) to represent the result as a hex-encoded string. Query: ```sql @@ -423,11 +419,11 @@ These functions use the `Fingerprint64` and `Hash64` methods respectively from a **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -540,9 +536,7 @@ This is just [JavaHash](#javahash) with zeroed out sign bit. This function is us **Returned value** -A `Int32` data type hash value. - -Type: `hiveHash`. +- `hiveHash` hash value. [Int32](../data-types/int-uint.md). **Example** @@ -570,11 +564,11 @@ metroHash64(par1, ...) **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -608,12 +602,12 @@ Alias: `yandexConsistentHash` (left for backwards compatibility sake). **Parameters** -- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md). -- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md). +- `input`: A UInt64-type key [UInt64](../data-types/int-uint.md). +- `n`: Number of buckets. [UInt16](../data-types/int-uint.md). **Returned value** -- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- A [UInt16](../data-types/int-uint.md) data type hash value. **Implementation details** @@ -644,12 +638,12 @@ murmurHash2_64(par1, ...) **Arguments** -Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -- The `murmurHash2_32` function returns hash value having the [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type. -- The `murmurHash2_64` function returns hash value having the [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type. +- The `murmurHash2_32` function returns hash value having the [UInt32](../data-types/int-uint.md) data type. +- The `murmurHash2_64` function returns hash value having the [UInt64](../data-types/int-uint.md) data type. **Example** @@ -675,13 +669,11 @@ gccMurmurHash(par1, ...) **Arguments** -- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](../data-types/index.md/#data_types). **Returned value** -- Calculated hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Calculated hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -714,13 +706,11 @@ MurmurHash(par1, ...) **Arguments** -- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](../data-types/index.md/#data_types). **Returned value** -- Calculated hash value. - -Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Calculated hash value. [UInt32](../data-types/int-uint.md). **Example** @@ -751,12 +741,12 @@ murmurHash3_64(par1, ...) **Arguments** -Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -- The `murmurHash3_32` function returns a [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. -- The `murmurHash3_64` function returns a [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- The `murmurHash3_32` function returns a [UInt32](../data-types/int-uint.md) data type hash value. +- The `murmurHash3_64` function returns a [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -782,13 +772,11 @@ murmurHash3_128(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions). [String](/docs/en/sql-reference/data-types/string.md). +- `expr` — A list of [expressions](../syntax.md/#syntax-expressions). [String](../data-types/string.md). **Returned value** -A 128-bit `MurmurHash3` hash value. - -Type: [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `MurmurHash3` hash value. [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -818,13 +806,11 @@ xxh3(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions) of any data type. +- `expr` — A list of [expressions](../syntax.md/#syntax-expressions) of any data type. **Returned value** -A 64-bit `xxh3` hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +A 64-bit `xxh3` hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -856,9 +842,11 @@ SELECT xxHash64('') **Returned value** -A `UInt32` or `UInt64` data type hash value. +- Hash value. [UInt32/64](../data-types/int-uint.md). -Type: `UInt32` for `xxHash32` and `UInt64` for `xxHash64`. +:::note +The return type will be `UInt32` for `xxHash32` and `UInt64` for `xxHash64`. +::: **Example** @@ -884,7 +872,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -894,14 +882,12 @@ ngramSimHash(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -923,7 +909,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -933,14 +919,12 @@ ngramSimHashCaseInsensitive(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -962,7 +946,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -972,14 +956,12 @@ ngramSimHashUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1001,7 +983,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1011,14 +993,12 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1040,7 +1020,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1050,14 +1030,12 @@ wordShingleSimHash(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1079,7 +1057,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1089,14 +1067,12 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1118,7 +1094,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1128,14 +1104,12 @@ wordShingleSimHashUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1157,7 +1131,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1167,14 +1141,12 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1204,13 +1176,11 @@ wyHash64(string) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `string` — String. [String](../data-types/string.md). **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1232,7 +1202,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1242,15 +1212,13 @@ ngramMinHash(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1272,7 +1240,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1282,15 +1250,13 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1312,7 +1278,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1322,15 +1288,13 @@ ngramMinHashUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1352,7 +1316,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1362,15 +1326,13 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1400,15 +1362,13 @@ ngramMinHashArg(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1438,15 +1398,13 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1476,15 +1434,13 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1514,15 +1470,13 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1544,7 +1498,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1554,15 +1508,13 @@ wordShingleMinHash(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1584,7 +1536,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1594,15 +1546,13 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1624,7 +1574,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1634,15 +1584,13 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1664,7 +1612,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1674,15 +1622,13 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1712,15 +1658,13 @@ wordShingleMinHashArg(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1750,15 +1694,13 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1788,15 +1730,13 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1826,15 +1766,13 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1872,7 +1810,7 @@ Alias: `sqid` **Returned Value** -A sqid [String](/docs/en/sql-reference/data-types/string.md). +A sqid [String](../data-types/string.md). **Example** @@ -1899,11 +1837,11 @@ sqidDecode(sqid) **Arguments** -- A sqid - [String](/docs/en/sql-reference/data-types/string.md) +- A sqid - [String](../data-types/string.md) **Returned Value** -The sqid transformed to numbers [Array(UInt64)](/docs/en/sql-reference/data-types/array.md). +The sqid transformed to numbers [Array(UInt64)](../data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index d07a5292431..c0256ba4735 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -11,7 +11,7 @@ There are at least\* two types of functions - regular functions (they are just c In this section we discuss regular functions. For aggregate functions, see the section “Aggregate functions”. :::note -There is a third type of function that the [‘arrayJoin’ function](/docs/en/sql-reference/functions/array-join.md) belongs to. And [table functions](/docs/en/sql-reference/table-functions/index.md) can also be mentioned separately. +There is a third type of function that the [‘arrayJoin’ function](../functions/array-join.md) belongs to. And [table functions](../table-functions/index.md) can also be mentioned separately. ::: ## Strong Typing @@ -63,4 +63,4 @@ For some functions the first argument (the lambda function) can be omitted. In t ## User Defined Functions (UDFs) -ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md). +ClickHouse supports user-defined functions. See [UDFs](../functions/udf.md). diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 1025b8bdc3d..bec97208843 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -36,16 +36,13 @@ addressToLine(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** - Source code filename and the line number in this file delimited by colon. - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. - - Name of a binary, if the function couldn’t find the debug information. - - Empty string, if the address is not valid. Type: [String](../../sql-reference/data-types/string.md). @@ -117,9 +114,11 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so ## addressToLineWithInlines -Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price. +Similar to `addressToLine`, but returns an Array with all inline functions. As a result of this, it is slower than `addressToLine`. +:::note If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. +::: **Syntax** @@ -129,17 +128,11 @@ addressToLineWithInlines(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. - -- Array with single element which is name of a binary, if the function couldn’t find the debug information. - -- Empty array, if the address is not valid. - -Type: [Array(String)](../../sql-reference/data-types/array.md). +- An array whose first element is the source code filename and line number in the file delimited by a colon. From the second element onwards, inline functions' source code filenames, line numbers and function names are listed. If the function couldn’t find the debug information, then an array with a single element equal to the name of the binary is returned, otherwise an empty array is returned if the address is not valid. [Array(String)](../data-types/array.md). **Example** @@ -232,14 +225,12 @@ addressToSymbol(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Symbol from ClickHouse object files. -- Empty string, if the address is not valid. - -Type: [String](../../sql-reference/data-types/string.md). +- Symbol from ClickHouse object files. [String](../data-types/string.md). +- Empty string, if the address is not valid. [String](../data-types/string.md). **Example** @@ -329,14 +320,11 @@ demangle(symbol) **Arguments** -- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. +- `symbol` ([String](../data-types/string.md)) — Symbol from an object file. **Returned value** -- Name of the C++ function. -- Empty string if a symbol is not valid. - -Type: [String](../../sql-reference/data-types/string.md). +- Name of the C++ function, or an empty string if the symbol is not valid. [String](../data-types/string.md). **Example** @@ -425,7 +413,7 @@ tid() **Returned value** -- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Current thread id. [Uint64](../data-types/int-uint.md#uint-ranges). **Example** @@ -455,7 +443,7 @@ logTrace('message') **Arguments** -- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). +- `message` — Message that is emitted to server log. [String](../data-types/string.md#string). **Returned value** diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index be20e02d77e..5b6a3aef2c8 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -147,13 +147,11 @@ IPv6StringToNum(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- IPv6 address in binary format. - -Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). +- IPv6 address in binary format. [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -248,7 +246,7 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32); ## toIPv4(string) -An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../../sql-reference/data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`. +An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`. ``` sql WITH @@ -296,7 +294,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null ## toIPv6 -Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. +Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format. If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned. @@ -309,13 +307,11 @@ toIPv6(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md) +- `string` — IP address. [String](../data-types/string.md) **Returned value** -- IP address. - -Type: [IPv6](../../sql-reference/data-types/ipv6.md). +- IP address. [IPv6](../data-types/ipv6.md). **Examples** @@ -370,13 +366,11 @@ isIPv4String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- `1` if `string` is IPv4 address, `0` otherwise. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `string` is IPv4 address, `0` otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -408,13 +402,11 @@ isIPv6String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- `1` if `string` is IPv6 address, `0` otherwise. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `string` is IPv6 address, `0` otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -449,14 +441,12 @@ This function accepts both IPv4 and IPv6 addresses (and networks) represented as **Arguments** -- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). -- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../../sql-reference/data-types/string.md). +- `address` — An IPv4 or IPv6 address. [String](../data-types/string.md). +- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../data-types/string.md). **Returned value** -- `1` or `0`. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` or `0`. [UInt8](../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index e920ab82988..5d73c9a83b3 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -4,13 +4,13 @@ sidebar_position: 105 sidebar_label: JSON --- -There are two sets of functions to parse JSON. - - `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. - - `JSONExtract*` is made to parse normal JSON. +There are two sets of functions to parse JSON: + - [`simpleJSON*` (`visitParam*`)](#simplejson--visitparam-functions) which is made for parsing a limited subset of JSON extremely fast. + - [`JSONExtract*`](#jsonextract-functions) which is made for parsing ordinary JSON. -# simpleJSON/visitParam functions +## simpleJSON / visitParam functions -ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. +ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be. They try to do as little as possible to get the job done as quickly as possible. The following assumptions are made: @@ -19,7 +19,7 @@ The following assumptions are made: 3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used. 4. The JSON does not have space characters outside of string literals. -## simpleJSONHas +### simpleJSONHas Checks whether there is a field named `field_name`. The result is `UInt8`. @@ -29,14 +29,16 @@ Checks whether there is a field named `field_name`. The result is `UInt8`. simpleJSONHas(json, field_name) ``` +Alias: `visitParamHas`. + **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) -- `field_name`: The name of the field to search for. [String literal](../syntax#string) +- `json` — The JSON in which the field is searched for. [String](../data-types/string.md#string) +- `field_name` — The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns `1` if the field exists, `0` otherwise. +- Returns `1` if the field exists, `0` otherwise. [UInt8](../data-types/int-uint.md). **Example** @@ -55,11 +57,13 @@ SELECT simpleJSONHas(json, 'foo') FROM jsons; SELECT simpleJSONHas(json, 'bar') FROM jsons; ``` +Result: + ```response 1 0 ``` -## simpleJSONExtractUInt +### simpleJSONExtractUInt Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. @@ -69,14 +73,16 @@ Parses `UInt64` from the value of the field named `field_name`. If this is a str simpleJSONExtractUInt(json, field_name) ``` +Alias: `visitParamExtractUInt`. + **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) -- `field_name`: The name of the field to search for. [String literal](../syntax#string) +- `json` — The JSON in which the field is searched for. [String](../data-types/string.md#string) +- `field_name` — The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. +- Returns the number parsed from the field if the field exists and contains a number, `0` otherwise. [UInt64](../data-types/int-uint.md). **Example** @@ -98,6 +104,8 @@ INSERT INTO jsons VALUES ('{"baz":2}'); SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json; ``` +Result: + ```response 0 4 @@ -106,7 +114,7 @@ SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json; 5 ``` -## simpleJSONExtractInt +### simpleJSONExtractInt Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. @@ -116,14 +124,16 @@ Parses `Int64` from the value of the field named `field_name`. If this is a stri simpleJSONExtractInt(json, field_name) ``` +Alias: `visitParamExtractInt`. + **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) -- `field_name`: The name of the field to search for. [String literal](../syntax#string) +- `json` — The JSON in which the field is searched for. [String](../data-types/string.md#string) +- `field_name` — The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. +- Returns the number parsed from the field if the field exists and contains a number, `0` otherwise. [Int64](../data-types/int-uint.md). **Example** @@ -145,6 +155,8 @@ INSERT INTO jsons VALUES ('{"baz":2}'); SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json; ``` +Result: + ```response 0 -4 @@ -153,7 +165,7 @@ SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json; 5 ``` -## simpleJSONExtractFloat +### simpleJSONExtractFloat Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. @@ -163,14 +175,16 @@ Parses `Float64` from the value of the field named `field_name`. If this is a st simpleJSONExtractFloat(json, field_name) ``` +Alias: `visitParamExtractFloat`. + **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) -- `field_name`: The name of the field to search for. [String literal](../syntax#string) +- `json` — The JSON in which the field is searched for. [String](../data-types/string.md#string) +- `field_name` — The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. +- Returns the number parsed from the field if the field exists and contains a number, `0` otherwise. [Float64](../data-types/float.md/#float32-float64). **Example** @@ -192,6 +206,8 @@ INSERT INTO jsons VALUES ('{"baz":2}'); SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json; ``` +Result: + ```response 0 -4000 @@ -200,7 +216,7 @@ SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json; 5 ``` -## simpleJSONExtractBool +### simpleJSONExtractBool Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`. @@ -210,10 +226,12 @@ Parses a true/false value from the value of the field named `field_name`. The re simpleJSONExtractBool(json, field_name) ``` +Alias: `visitParamExtractBool`. + **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) -- `field_name`: The name of the field to search for. [String literal](../syntax#string) +- `json` — The JSON in which the field is searched for. [String](../data-types/string.md#string) +- `field_name` — The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -240,6 +258,8 @@ SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json; SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json; ``` +Result: + ```response 0 1 @@ -247,7 +267,7 @@ SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json; 0 ``` -## simpleJSONExtractRaw +### simpleJSONExtractRaw Returns the value of the field named `field_name` as a `String`, including separators. @@ -257,14 +277,16 @@ Returns the value of the field named `field_name` as a `String`, including separ simpleJSONExtractRaw(json, field_name) ``` +Alias: `visitParamExtractRaw`. + **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) -- `field_name`: The name of the field to search for. [String literal](../syntax#string) +- `json` — The JSON in which the field is searched for. [String](../data-types/string.md#string) +- `field_name` — The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. +- Returns the value of the field as a string, including separators if the field exists, or an empty string otherwise. [`String`](../data-types/string.md#string) **Example** @@ -286,6 +308,8 @@ INSERT INTO jsons VALUES ('{"baz":2}'); SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json; ``` +Result: + ```response "-4e3" @@ -294,7 +318,7 @@ SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json; {"def":[1,2,3]} ``` -## simpleJSONExtractString +### simpleJSONExtractString Parses `String` in double quotes from the value of the field named `field_name`. @@ -304,14 +328,16 @@ Parses `String` in double quotes from the value of the field named `field_name`. simpleJSONExtractString(json, field_name) ``` +Alias: `visitParamExtractString`. + **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) -- `field_name`: The name of the field to search for. [String literal](../syntax#string) +- `json` — The JSON in which the field is searched for. [String](../data-types/string.md#string) +- `field_name` — The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. +- Returns the unescaped value of a field as a string, including separators. An empty string is returned if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. [String](../data-types/string.md). **Implementation details** @@ -336,6 +362,8 @@ INSERT INTO jsons VALUES ('{"foo":"hello}'); SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json; ``` +Result: + ```response \n\0 @@ -343,73 +371,61 @@ SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json; ``` -## visitParamHas +## JSONExtract functions -This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas). +The following functions are based on [simdjson](https://github.com/lemire/simdjson), and designed for more complex JSON parsing requirements. -## visitParamExtractUInt +### isValidJSON -This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint). +Checks that passed string is valid JSON. -## visitParamExtractInt +**Syntax** -This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint). +```sql +isValidJSON(json) +``` -## visitParamExtractFloat - -This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat). - -## visitParamExtractBool - -This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool). - -## visitParamExtractRaw - -This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw). - -## visitParamExtractString - -This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring). - -# JSONExtract functions - -The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements. - -## isValidJSON(json) - -Checks that passed string is a valid json. - -Examples: +**Examples** ``` sql SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1 SELECT isValidJSON('not a json') = 0 ``` -## JSONHas(json\[, indices_or_keys\]…) +### JSONHas -If the value exists in the JSON document, `1` will be returned. +If the value exists in the JSON document, `1` will be returned. If the value does not exist, `0` will be returned. -If the value does not exist, `0` will be returned. +**Syntax** -Examples: +```sql +JSONHas(json [, indices_or_keys]...) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns `1` if the value exists in `json`, otherwise `0`. [UInt8](../data-types/int-uint.md). + +**Examples** + +Query: ``` sql SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 1 SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4) = 0 ``` -`indices_or_keys` is a list of zero or more arguments each of them can be either string or integer. - -- String = access object member by key. -- Positive integer = access the n-th member/key from the beginning. -- Negative integer = access the n-th member/key from the end. - -Minimum index of the element is 1. Thus the element 0 does not exist. - -You may use integers to access both JSON arrays and JSON objects. - -So, for example: +The minimum index of the element is 1. Thus the element 0 does not exist. You may use integers to access both JSON arrays and JSON objects. For example: ``` sql SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'a' @@ -419,26 +435,62 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' ``` -## JSONLength(json\[, indices_or_keys\]…) +### JSONLength -Return the length of a JSON array or a JSON object. +Return the length of a JSON array or a JSON object. If the value does not exist or has the wrong type, `0` will be returned. -If the value does not exist or has a wrong type, `0` will be returned. +**Syntax** -Examples: +```sql +JSONLength(json [, indices_or_keys]...) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns the length of the JSON array or JSON object. Returns `0` if the value does not exist or has the wrong type. [UInt64](../data-types/int-uint.md). + +**Examples** ``` sql SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 ``` -## JSONType(json\[, indices_or_keys\]…) +### JSONType -Return the type of a JSON value. +Return the type of a JSON value. If the value does not exist, `Null` will be returned. -If the value does not exist, `Null` will be returned. +**Syntax** -Examples: +```sql +JSONType(json [, indices_or_keys]...) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns the type of a JSON value as a string, otherwise if the value doesn't exists it returns `Null`. [String](../data-types/string.md). + +**Examples** ``` sql SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}') = 'Object' @@ -446,35 +498,191 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' ``` -## JSONExtractUInt(json\[, indices_or_keys\]…) +### JSONExtractUInt -## JSONExtractInt(json\[, indices_or_keys\]…) +Parses JSON and extracts a value of UInt type. -## JSONExtractFloat(json\[, indices_or_keys\]…) +**Syntax** -## JSONExtractBool(json\[, indices_or_keys\]…) - -Parses a JSON and extract a value. These functions are similar to `visitParam` functions. - -If the value does not exist or has a wrong type, `0` will be returned. - -Examples: - -``` sql -SELECT JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 -SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200.0 -SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 +```sql +JSONExtractUInt(json [, indices_or_keys]...) ``` -## JSONExtractString(json\[, indices_or_keys\]…) +**Parameters** -Parses a JSON and extract a string. This function is similar to `visitParamExtractString` functions. +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). -If the value does not exist or has a wrong type, an empty string will be returned. +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. -The value is unescaped. If unescaping failed, it returns an empty string. +**Returned value** -Examples: +- Returns a UInt value if it exists, otherwise it returns `Null`. [UInt64](../data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) as x, toTypeName(x); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┐ +│ 300 │ UInt64 │ +└─────┴───────────────┘ +``` + +### JSONExtractInt + +Parses JSON and extracts a value of Int type. + +**Syntax** + +```sql +JSONExtractInt(json [, indices_or_keys]...) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns an Int value if it exists, otherwise it returns `Null`. [Int64](../data-types/int-uint.md). + +**Examples** + +Query: + +``` sql +SELECT JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) as x, toTypeName(x); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┐ +│ 300 │ Int64 │ +└─────┴───────────────┘ +``` + +### JSONExtractFloat + +Parses JSON and extracts a value of Int type. + +**Syntax** + +```sql +JSONExtractFloat(json [, indices_or_keys]...) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns an Float value if it exists, otherwise it returns `Null`. [Float64](../data-types/float.md). + +**Examples** + +Query: + +``` sql +SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) as x, toTypeName(x); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┐ +│ 200 │ Float64 │ +└─────┴───────────────┘ +``` + +### JSONExtractBool + +Parses JSON and extracts a boolean value. If the value does not exist or has a wrong type, `0` will be returned. + +**Syntax** + +```sql +JSONExtractBool(json\[, indices_or_keys\]...) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns a Boolean value if it exists, otherwise it returns `0`. [Bool](../data-types/boolean.md). + +**Example** + +Query: + +``` sql +SELECT JSONExtractBool('{"passed": true}', 'passed'); +``` + +Result: + +```response +┌─JSONExtractBool('{"passed": true}', 'passed')─┐ +│ 1 │ +└───────────────────────────────────────────────┘ +``` + +### JSONExtractString + +Parses JSON and extracts a string. This function is similar to [`visitParamExtractString`](#simplejsonextractstring) functions. If the value does not exist or has a wrong type, an empty string will be returned. + +**Syntax** + +```sql +JSONExtractString(json [, indices_or_keys]...) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns an unescaped string from `json`. If unescaping failed, if the value does not exist or if it has a wrong type then it returns an empty string. [String](../data-types/string.md). + +**Examples** ``` sql SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'hello' @@ -484,16 +692,35 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = '' SELECT JSONExtractString('{"abc":"hello}', 'abc') = '' ``` -## JSONExtract(json\[, indices_or_keys…\], Return_type) +### JSONExtract -Parses a JSON and extract a value of the given ClickHouse data type. +Parses JSON and extracts a value of the given ClickHouse data type. This function is a generalized version of the previous `JSONExtract` functions. Meaning: -This is a generalization of the previous `JSONExtract` functions. -This means `JSONExtract(..., 'String')` returns exactly the same as `JSONExtractString()`, `JSONExtract(..., 'Float64')` returns exactly the same as `JSONExtractFloat()`. -Examples: +**Syntax** + +```sql +JSONExtract(json [, indices_or_keys...], return_type) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). +- `return_type` — A string specifying the type of the value to extract. [String](../data-types/string.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns a value if it exists of the specified return type, otherwise it returns `0`, `Null`, or an empty-string depending on the specified return type. [UInt64](../data-types/int-uint.md), [Int64](../data-types/int-uint.md), [Float64](../data-types/float.md), [Bool](../data-types/boolean.md) or [String](../data-types/string.md). + +**Examples** ``` sql SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, Array(Float64))') = ('hello',[-100,200,300]) @@ -506,17 +733,38 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' ``` -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) +### JSONExtractKeysAndValues -Parses key-value pairs from a JSON where the values are of the given ClickHouse data type. +Parses key-value pairs from JSON where the values are of the given ClickHouse data type. -Example: +**Syntax** + +```sql +JSONExtractKeysAndValues(json [, indices_or_keys...], value_type) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). +- `value_type` — A string specifying the type of the value to extract. [String](../data-types/string.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns an array of parsed key-value pairs. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)(`value_type`)). + +**Example** ``` sql SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8') = [('a',5),('b',7),('c',11)]; ``` -## JSONExtractKeys +### JSONExtractKeys Parses a JSON string and extracts the keys. @@ -526,16 +774,14 @@ Parses a JSON string and extracts the keys. JSONExtractKeys(json[, a, b, c...]) ``` -**Arguments** +**Parameters** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../../sql-reference/data-types/string.md) to get the field by the key or an [Integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../data-types/string.md) with valid JSON. +- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../data-types/string.md) to get the field by the key or an [Integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned value** -Array with the keys of the JSON. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- Returns an array with the keys of the JSON. [Array](../data-types/array.md)([String](../data-types/string.md)). **Example** @@ -554,31 +800,67 @@ text └────────────────────────────────────────────────────────────┘ ``` -## JSONExtractRaw(json\[, indices_or_keys\]…) +### JSONExtractRaw -Returns a part of JSON as unparsed string. +Returns part of the JSON as an unparsed string. If the part does not exist or has the wrong type, an empty string will be returned. -If the part does not exist or has a wrong type, an empty string will be returned. +**Syntax** -Example: +```sql +JSONExtractRaw(json [, indices_or_keys]...) +``` + +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns part of the JSON as an unparsed string. If the part does not exist or has the wrong type, an empty string is returned. [String](../data-types/string.md). + +**Example** ``` sql SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]'; ``` -## JSONExtractArrayRaw(json\[, indices_or_keys…\]) +### JSONExtractArrayRaw -Returns an array with elements of JSON array, each represented as unparsed string. +Returns an array with elements of JSON array, each represented as unparsed string. If the part does not exist or isn’t an array, then an empty array will be returned. -If the part does not exist or isn’t array, an empty array will be returned. +**Syntax** -Example: +```sql +JSONExtractArrayRaw(json [, indices_or_keys...]) +``` -``` sql +**Parameters** + +- `json` — JSON string to parse. [String](../data-types/string.md). +- `indices_or_keys` — A list of zero or more arguments, each of which can be either string or integer. [String](../data-types/string.md), [Int*](../data-types/int-uint.md). + +`indices_or_keys` type: +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. + +**Returned value** + +- Returns an array with elements of JSON array, each represented as unparsed string. Otherwise, an empty array is returned if the part does not exist or is not an array. [Array](../data-types/array.md)([String](../data-types/string.md)). + +**Example** + +```sql SELECT JSONExtractArrayRaw('{"a": "hello", "b": [-100, 200.0, "hello"]}', 'b') = ['-100', '200.0', '"hello"']; ``` -## JSONExtractKeysAndValuesRaw +### JSONExtractKeysAndValuesRaw Extracts raw data from a JSON object. @@ -590,15 +872,13 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../data-types/string.md) with valid JSON. +- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../data-types/string.md) to get the field by the key or an [integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned values** -- Array with `('key', 'value')` tuples. Both tuple members are strings. -- Empty array if the requested object does not exist, or input JSON is invalid. - -Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). +- Array with `('key', 'value')` tuples. Both tuple members are strings. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)). +- Empty array if the requested object does not exist, or input JSON is invalid. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)). **Examples** @@ -644,13 +924,30 @@ Result: └───────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## JSON_EXISTS(json, path) +### JSON_EXISTS -If the value exists in the JSON document, `1` will be returned. +If the value exists in the JSON document, `1` will be returned. If the value does not exist, `0` will be returned. -If the value does not exist, `0` will be returned. +**Syntax** -Examples: +```sql +JSON_EXISTS(json, path) +``` + +**Parameters** + +- `json` — A string with valid JSON. [String](../data-types/string.md). +- `path` — A string representing the path. [String](../data-types/string.md). + +:::note +Before version 21.11 the order of arguments was wrong, i.e. JSON_EXISTS(path, json) +::: + +**Returned value** + +- Returns `1` if the value exists in the JSON document, otherwise `0`. + +**Examples** ``` sql SELECT JSON_EXISTS('{"hello":1}', '$.hello'); @@ -659,17 +956,32 @@ SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); ``` +### JSON_QUERY + +Parses a JSON and extract a value as a JSON array or JSON object. If the value does not exist, an empty string will be returned. + +**Syntax** + +```sql +JSON_QUERY(json, path) +``` + +**Parameters** + +- `json` — A string with valid JSON. [String](../data-types/string.md). +- `path` — A string representing the path. [String](../data-types/string.md). + :::note Before version 21.11 the order of arguments was wrong, i.e. JSON_EXISTS(path, json) ::: -## JSON_QUERY(json, path) +**Returned value** -Parses a JSON and extract a value as JSON array or JSON object. +- Returns the extracted value as a JSON array or JSON object. Otherwise it returns an empty string if the value does not exist. [String](../data-types/string.md). -If the value does not exist, an empty string will be returned. +**Example** -Example: +Query: ``` sql SELECT JSON_QUERY('{"hello":"world"}', '$.hello'); @@ -686,17 +998,38 @@ Result: [2] String ``` + +### JSON_VALUE + +Parses a JSON and extract a value as a JSON scalar. If the value does not exist, an empty string will be returned by default. + +This function is controlled by the following settings: + +- by SET `function_json_value_return_type_allow_nullable` = `true`, `NULL` will be returned. If the value is complex type (such as: struct, array, map), an empty string will be returned by default. +- by SET `function_json_value_return_type_allow_complex` = `true`, the complex value will be returned. + +**Syntax** + +```sql +JSON_VALUE(json, path) +``` + +**Parameters** + +- `json` — A string with valid JSON. [String](../data-types/string.md). +- `path` — A string representing the path. [String](../data-types/string.md). + :::note -Before version 21.11 the order of arguments was wrong, i.e. JSON_QUERY(path, json) +Before version 21.11 the order of arguments was wrong, i.e. JSON_EXISTS(path, json) ::: -## JSON_VALUE(json, path) +**Returned value** -Parses a JSON and extract a value as JSON scalar. +- Returns the extracted value as a JSON scalar if it exists, otherwise an empty string is returned. [String](../data-types/string.md). -If the value does not exist, an empty string will be returned by default, and by SET `function_json_value_return_type_allow_nullable` = `true`, `NULL` will be returned. If the value is complex type (such as: struct, array, map), an empty string will be returned by default, and by SET `function_json_value_return_type_allow_complex` = `true`, the complex value will be returned. +**Example** -Example: +Query: ``` sql SELECT JSON_VALUE('{"hello":"world"}', '$.hello'); @@ -716,16 +1049,12 @@ world String ``` -:::note -Before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, json) -::: - -## toJSONString +### toJSONString Serializes a value to its JSON representation. Various data types and nested structures are supported. -64-bit [integers](../../sql-reference/data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior. +64-bit [integers](../data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior. Special values `NaN` and `inf` are replaced with `null`. Enable [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) setting to show them. -When serializing an [Enum](../../sql-reference/data-types/enum.md) value, the function outputs its name. +When serializing an [Enum](../data-types/enum.md) value, the function outputs its name. **Syntax** @@ -739,14 +1068,12 @@ toJSONString(value) **Returned value** -- JSON representation of the value. - -Type: [String](../../sql-reference/data-types/string.md). +- JSON representation of the value. [String](../data-types/string.md). **Example** -The first example shows serialization of a [Map](../../sql-reference/data-types/map.md). -The second example shows some special values wrapped into a [Tuple](../../sql-reference/data-types/tuple.md). +The first example shows serialization of a [Map](../data-types/map.md). +The second example shows some special values wrapped into a [Tuple](../data-types/tuple.md). Query: @@ -768,7 +1095,7 @@ Result: - [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) -## JSONArrayLength +### JSONArrayLength Returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid. @@ -782,13 +1109,11 @@ Alias: `JSON_ARRAY_LENGTH(json)`. **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `json` — [String](../data-types/string.md) with valid JSON. **Returned value** -- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. - -Type: [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md). +- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. [Nullable(UInt64)](../data-types/int-uint.md). **Example** @@ -803,7 +1128,7 @@ SELECT ``` -## jsonMergePatch +### jsonMergePatch Returns the merged JSON object string which is formed by merging multiple JSON objects. @@ -815,13 +1140,11 @@ jsonMergePatch(json1, json2, ...) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `json` — [String](../data-types/string.md) with valid JSON. **Returned value** -- If JSON object strings are valid, return the merged JSON object string. - -Type: [String](../../sql-reference/data-types/string.md). +- If JSON object strings are valid, return the merged JSON object string. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 138b804a575..7222dbeeb0d 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -6,7 +6,7 @@ sidebar_label: Logical # Logical Functions -Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../../sql-reference/data-types/int-uint.md) or in some cases `NULL`. +Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../data-types/int-uint.md) or in some cases `NULL`. Zero as an argument is considered `false`, non-zero values are considered `true`. @@ -26,7 +26,7 @@ Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-ope **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** @@ -80,7 +80,7 @@ Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-opera **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** @@ -132,7 +132,7 @@ Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-ne **Arguments** -- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — The value. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** @@ -168,7 +168,7 @@ xor(val1, val2...) **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 5ebc6191010..12098efc635 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -18,7 +18,7 @@ e() **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## pi @@ -31,7 +31,7 @@ pi() ``` **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## exp @@ -45,11 +45,11 @@ exp(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## log @@ -65,11 +65,11 @@ Alias: `ln(x)` **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## exp2 @@ -83,11 +83,11 @@ exp2(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## intExp2 @@ -111,11 +111,11 @@ log2(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## exp10 @@ -129,11 +129,11 @@ exp10(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## intExp10 @@ -157,11 +157,11 @@ log10(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## sqrt @@ -173,11 +173,11 @@ sqrt(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## cbrt @@ -189,11 +189,11 @@ cbrt(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## erf @@ -207,11 +207,11 @@ erf(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). **Example** @@ -239,11 +239,11 @@ erfc(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## lgamma @@ -257,11 +257,11 @@ lgamma(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## tgamma @@ -275,11 +275,11 @@ gamma(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## sin @@ -293,11 +293,11 @@ sin(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). **Example** @@ -323,11 +323,11 @@ cos(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## tan @@ -341,11 +341,11 @@ tan(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## asin @@ -359,11 +359,11 @@ asin(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## acos @@ -377,11 +377,11 @@ acos(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## atan @@ -395,11 +395,11 @@ atan(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## pow @@ -415,12 +415,12 @@ Alias: `power(x, y)` **Arguments** -- `x` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) -- `y` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) +- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md) +- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md) **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## cosh @@ -434,13 +434,13 @@ cosh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `1 <= cosh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -468,13 +468,13 @@ acosh(x) **Arguments** -- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -502,13 +502,13 @@ sinh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-∞ < sinh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -536,13 +536,13 @@ asinh(x) **Arguments** -- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -569,13 +569,13 @@ tanh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-1 < tanh(x) < 1`. -Type: [Float*](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float*](../data-types/float.md#float32-float64). **Example** @@ -601,13 +601,13 @@ atanh(x) **Arguments** -- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -635,14 +635,14 @@ atan2(y, x) **Arguments** -- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). -- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). +- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle `θ` such that `−π < θ ≤ π`, in radians. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -670,14 +670,14 @@ hypot(x, y) **Arguments** -- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). -- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). +- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The length of the hypotenuse of a right-angle triangle. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -705,13 +705,13 @@ log1p(x) **Arguments** -- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-∞ < log1p(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -747,7 +747,7 @@ sign(x) - 0 for `x = 0` - 1 for `x > 0` -Type: [Int8](../../sql-reference/data-types/int-uint.md). +Type: [Int8](../data-types/int-uint.md). **Examples** @@ -792,6 +792,39 @@ Result: │ -1 │ └──────────┘ ``` +## sigmoid + +Returns the [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function). + +**Syntax** + +```sql +sigmoid(x) +``` + +**Parameters** + +- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). + +**Returned value** + +- Corresponding value along the sigmoid curve between 0 and 1. [Float64](../data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT round(sigmoid(x), 5) FROM (SELECT arrayJoin([-1, 0, 1]) AS x); +``` + +Result: + +```result +0.26894 +0.5 +0.73106 +``` ## degrees @@ -805,13 +838,12 @@ degrees(x) **Arguments** -- `x` — Input in radians. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Value in degrees. - -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +- Value in degrees. [Float64](../data-types/float.md#float32-float64). **Example** @@ -839,13 +871,13 @@ radians(x) **Arguments** -- `x` — Input in degrees. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Input in degrees. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Value in radians. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -914,3 +946,49 @@ Result: │ 11 │ └──────────────────────────────────┘ ``` + +## proportionsZTest + +Returns test statistics for the two proportion Z-test - a statistical test for comparing the proportions from two populations `x` and `y`. + +**Syntax** + +```sql +proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_type) +``` + +**Arguments** + +- `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md). +- `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md). +- `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md). +- `trials_y`: Number of trials in population `y`. [UInt64](../data-types/int-uint.md). +- `conf_level`: Confidence level for the test. [Float64](../data-types/float.md). +- `pool_type`: Selection of pooling (way in which the standard error is estimated). Can be either `unpooled` or `pooled`. [String](../data-types/string.md). + +:::note +For argument `pool_type`: In the pooled version, the two proportions are averaged, and only one proportion is used to estimate the standard error. In the unpooled version, the two proportions are used separately. +::: + +**Returned value** + +- `z_stat`: Z statistic. [Float64](../data-types/float.md). +- `p_val`: P value. [Float64](../data-types/float.md). +- `ci_low`: The lower confidence interval. [Float64](../data-types/float.md). +- `ci_high`: The upper confidence interval. [Float64](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled'); +``` + +Result: + +```response +┌─proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled')───────────────────────────────┐ +│ (-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 3e0458d226d..4bfa181a35f 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -23,7 +23,7 @@ stem('language', word) ### Arguments - `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). -- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). +- `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string). ### Examples @@ -88,8 +88,8 @@ lemmatize('language', word) ### Arguments -- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string). +- `language` — Language which rules will be applied. [String](../data-types/string.md#string). +- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). ### Examples @@ -139,8 +139,8 @@ synonyms('extension_name', word) ### Arguments -- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string). +- `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string). +- `word` — Word that will be searched in extension. [String](../data-types/string.md#string). ### Examples @@ -188,7 +188,7 @@ detectLanguage('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -226,7 +226,7 @@ detectLanguageMixed('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -262,7 +262,7 @@ detectLanguageUnknown('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -302,7 +302,7 @@ detectCharset('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 2b0215115cb..31df9e5627d 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -6,11 +6,21 @@ sidebar_label: Other # Other Functions -## hostName() +## hostName Returns the name of the host on which this function was executed. If the function executes on a remote server (distributed processing), the remote server name is returned. If the function executes in the context of a distributed table, it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +**Syntax** + +```sql +hostName() +``` + +**Returned value** + +- Host name. [String](../data-types/string.md). + ## getMacro {#getMacro} Returns a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration. @@ -23,13 +33,11 @@ getMacro(name); **Arguments** -- `name` — Macro name to retrieve from the `` section. [String](../../sql-reference/data-types/string.md#string). +- `name` — Macro name to retrieve from the `` section. [String](../data-types/string.md#string). **Returned value** -- Value of the specified macro. - -Type: [String](../../sql-reference/data-types/string.md). +- Value of the specified macro. [String](../data-types/string.md). **Example** @@ -82,9 +90,7 @@ This function is case-insensitive. **Returned value** -- String with the fully qualified domain name. - -Type: `String`. +- String with the fully qualified domain name. [String](../data-types/string.md). **Example** @@ -110,7 +116,7 @@ basename(expr) **Arguments** -- `expr` — A value of type [String](../../sql-reference/data-types/string.md). Backslashes must be escaped. +- `expr` — A value of type [String](../data-types/string.md). Backslashes must be escaped. **Returned Value** @@ -163,34 +169,58 @@ Result: └────────────────┴────────────────────────────┘ ``` -## visibleWidth(x) +## visibleWidth Calculates the approximate width when outputting values to the console in text format (tab-separated). -This function is used by the system to implement Pretty formats. +This function is used by the system to implement [Pretty formats](../../interfaces/formats.md). `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats. +**Syntax** + +```sql +visibleWidth(x) +``` + +**Example** + +Query: + ```sql SELECT visibleWidth(NULL) ``` +Result: + ```text ┌─visibleWidth(NULL)─┐ │ 4 │ └────────────────────┘ ``` -## toTypeName(x) +## toTypeName Returns the type name of the passed argument. If `NULL` is passed, then the function returns type `Nullable(Nothing)`, which corresponds to ClickHouse's internal `NULL` representation. -## blockSize() {#blockSize} +**Syntax** + +```sql +toTypeName(x) +``` + +## blockSize {#blockSize} In ClickHouse, queries are processed in blocks (chunks). This function returns the size (row count) of the block the function is called on. +**Syntax** + +```sql +blockSize() +``` + ## byteSize Returns an estimation of uncompressed byte size of its arguments in memory. @@ -207,13 +237,11 @@ byteSize(argument [, ...]) **Returned value** -- Estimation of byte size of the arguments in memory. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Estimation of byte size of the arguments in memory. [UInt64](../data-types/int-uint.md). **Examples** -For [String](../../sql-reference/data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length). +For [String](../data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length). Query: @@ -288,16 +316,28 @@ Result: └────────────────────────────┘ ``` -## materialize(x) +## materialize Turns a constant into a full column containing a single value. Full columns and constants are represented differently in memory. Functions usually execute different code for normal and constant arguments, although the result should typically be the same. This function can be used to debug this behavior. -## ignore(…) +**Syntax** + +```sql +materialize(x) +``` + +## ignore Accepts any arguments, including `NULL` and does nothing. Always returns 0. The argument is internally still evaluated. Useful e.g. for benchmarks. +**Syntax** + +```sql +ignore(x) +``` + ## sleep Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes. @@ -310,7 +350,7 @@ sleep(seconds) **Arguments** -- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. +- `seconds`: [UInt*](../data-types/int-uint.md) or [Float](../data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. **Returned value** @@ -360,7 +400,7 @@ sleepEachRow(seconds) **Arguments** -- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. +- `seconds`: [UInt*](../data-types/int-uint.md) or [Float*](../data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. **Returned value** @@ -392,27 +432,33 @@ The `sleepEachRow()` function is primarily used for testing and debugging purpos Like the [`sleep()` function](#sleep), it's important to use `sleepEachRow()` judiciously and only when necessary, as it can significantly impact the overall performance and responsiveness of your ClickHouse system, especially when dealing with large result sets. -## currentDatabase() +## currentDatabase Returns the name of the current database. Useful in table engine parameters of `CREATE TABLE` queries where you need to specify the database. -## currentUser() {#currentUser} +**Syntax** + +```sql +currentDatabase() +``` + +## currentUser {#currentUser} Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned. +**Syntax** + ```sql -SELECT currentUser(); +currentUser() ``` Aliases: `user()`, `USER()`, `current_user()`. Aliases are case insensitive. **Returned values** -- The name of the current user. -- In distributed queries, the login of the user who initiated the query. - -Type: `String`. +- The name of the current user. [String](../data-types/string.md). +- In distributed queries, the login of the user who initiated the query. [String](../data-types/string.md). **Example** @@ -448,10 +494,8 @@ isConstant(x) **Returned values** -- `1` if `x` is constant. -- `0` if `x` is non-constant. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `x` is constant. [UInt8](../data-types/int-uint.md). +- `0` if `x` is non-constant. [UInt8](../data-types/int-uint.md). **Examples** @@ -497,52 +541,6 @@ Result: └────────────────────┘ ``` -## isFinite(x) - -Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0. - -## isInfinite(x) - -Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN. - -## ifNotFinite - -Checks whether a floating point value is finite. - -**Syntax** - -```sql -ifNotFinite(x,y) -``` - -**Arguments** - -- `x` — Value to check for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). -- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). - -**Returned value** - -- `x` if `x` is finite. -- `y` if `x` is not finite. - -**Example** - -Query: - - SELECT 1/0 as infimum, ifNotFinite(infimum,42) - -Result: - - ┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐ - │ inf │ 42 │ - └─────────┴───────────────────────────────┘ - -You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. - -## isNaN(x) - -Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. - ## hasColumnInTable Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. @@ -733,11 +731,21 @@ LIMIT 10 └────────────────┴─────────┘ ``` -## formatReadableDecimalSize(x) +## formatReadableDecimalSize Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string. -Example: +The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull). + +**Syntax** + +```sql +formatReadableDecimalSize(x) +``` + +**Example** + +Query: ```sql SELECT @@ -745,6 +753,8 @@ SELECT formatReadableDecimalSize(filesize_bytes) AS filesize ``` +Result: + ```text ┌─filesize_bytes─┬─filesize───┐ │ 1 │ 1.00 B │ @@ -754,11 +764,22 @@ SELECT └────────────────┴────────────┘ ``` -## formatReadableSize(x) +## formatReadableSize Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string. -Example: +The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull). + +**Syntax** + +```sql +formatReadableSize(x) +``` +Alias: `FORMAT_BYTES`. + +**Example** + +Query: ```sql SELECT @@ -766,7 +787,7 @@ SELECT formatReadableSize(filesize_bytes) AS filesize ``` -Alias: `FORMAT_BYTES`. +Result: ```text ┌─filesize_bytes─┬─filesize───┐ @@ -777,11 +798,19 @@ Alias: `FORMAT_BYTES`. └────────────────┴────────────┘ ``` -## formatReadableQuantity(x) +## formatReadableQuantity Given a number, this function returns a rounded number with suffix (thousand, million, billion, etc.) as string. -Example: +**Syntax** + +```sql +formatReadableQuantity(x) +``` + +**Example** + +Query: ```sql SELECT @@ -789,6 +818,8 @@ SELECT formatReadableQuantity(number) AS number_for_humans ``` +Result: + ```text ┌─────────number─┬─number_for_humans─┐ │ 1024 │ 1.02 thousand │ @@ -863,6 +894,122 @@ SELECT └────────────────────┴────────────────────────────────────────────────┘ ``` +## parseReadableSize + +Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. +If the function is unable to parse the input value, it throws an exception. + +The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). + +**Syntax** + +```sql +formatReadableSize(x) +``` + +**Arguments** + +- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)). + +**Returned value** + +- Number of bytes, rounded up to the nearest integer ([UInt64](../../sql-reference/data-types/int-uint.md)). + +**Example** + +```sql +SELECT + arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB']) AS readable_sizes, + parseReadableSize(readable_sizes) AS sizes; +``` + +```text +┌─readable_sizes─┬───sizes─┐ +│ 1 B │ 1 │ +│ 1 KiB │ 1024 │ +│ 3 MB │ 3000000 │ +│ 5.314 KiB │ 5442 │ +└────────────────┴─────────┘ +``` + +## parseReadableSizeOrNull + +Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. +If the function is unable to parse the input value, it returns `NULL`. + +The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). + +**Syntax** + +```sql +parseReadableSizeOrNull(x) +``` + +**Arguments** + +- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)). + +**Returned value** + +- Number of bytes, rounded up to the nearest integer, or NULL if unable to parse the input (Nullable([UInt64](../../sql-reference/data-types/int-uint.md))). + +**Example** + +```sql +SELECT + arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes, + parseReadableSizeOrNull(readable_sizes) AS sizes; +``` + +```text +┌─readable_sizes─┬───sizes─┐ +│ 1 B │ 1 │ +│ 1 KiB │ 1024 │ +│ 3 MB │ 3000000 │ +│ 5.314 KiB │ 5442 │ +│ invalid │ ᴺᵁᴸᴸ │ +└────────────────┴─────────┘ +``` + +## parseReadableSizeOrZero + +Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`. + +The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). + + +**Syntax** + +```sql +parseReadableSizeOrZero(x) +``` + +**Arguments** + +- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)). + +**Returned value** + +- Number of bytes, rounded up to the nearest integer, or 0 if unable to parse the input ([UInt64](../../sql-reference/data-types/int-uint.md)). + +**Example** + +```sql +SELECT + arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes, + parseReadableSizeOrZero(readable_sizes) AS sizes; +``` + +```text +┌─readable_sizes─┬───sizes─┐ +│ 1 B │ 1 │ +│ 1 KiB │ 1024 │ +│ 3 MB │ 3000000 │ +│ 5.314 KiB │ 5442 │ +│ invalid │ 0 │ +└────────────────┴─────────┘ +``` + ## parseTimeDelta Parse a sequence of numbers followed by something resembling a time unit. @@ -903,15 +1050,27 @@ SELECT parseTimeDelta('1yr2mo') └──────────────────────────┘ ``` -## least(a, b) +## least Returns the smaller value of a and b. -## greatest(a, b) +**Syntax** + +```sql +least(a, b) +``` + +## greatest Returns the larger value of a and b. -## uptime() +**Syntax** + +```sql +greatest(a, b) +``` + +## uptime Returns the server’s uptime in seconds. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. @@ -924,9 +1083,7 @@ uptime() **Returned value** -- Time value of seconds. - -Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Time value of seconds. [UInt32](../data-types/int-uint.md). **Example** @@ -944,7 +1101,7 @@ Result: └────────┘ ``` -## version() +## version Returns the current version of ClickHouse as a string in the form of: @@ -971,7 +1128,7 @@ None. **Returned value** -Type: [String](../data-types/string) +- Current version of ClickHouse. [String](../data-types/string). **Implementation details** @@ -993,22 +1150,178 @@ SELECT version() └───────────┘ ``` -## buildId() +## buildId Returns the build ID generated by a compiler for the running ClickHouse server binary. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. -## blockNumber() +**Syntax** -Returns the sequence number of the data block where the row is located. +```sql +buildId() +``` -## rowNumberInBlock() {#rowNumberInBlock} +## blockNumber -Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. +Returns a monotonically increasing sequence number of the [block](../../development/architecture.md#block) containing the row. +The returned block number is updated on a best-effort basis, i.e. it may not be fully accurate. -## rowNumberInAllBlocks() +**Syntax** -Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. +```sql +blockNumber() +``` + +**Returned value** + +- Sequence number of the data block where the row is located. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT blockNumber() +FROM +( + SELECT * + FROM system.numbers + LIMIT 10 +) SETTINGS max_block_size = 2 +``` + +Result: + +```response +┌─blockNumber()─┐ +│ 7 │ +│ 7 │ +└───────────────┘ +┌─blockNumber()─┐ +│ 8 │ +│ 8 │ +└───────────────┘ +┌─blockNumber()─┐ +│ 9 │ +│ 9 │ +└───────────────┘ +┌─blockNumber()─┐ +│ 10 │ +│ 10 │ +└───────────────┘ +┌─blockNumber()─┐ +│ 11 │ +│ 11 │ +└───────────────┘ +``` + +## rowNumberInBlock {#rowNumberInBlock} + +Returns for each [block](../../development/architecture.md#block) processed by `rowNumberInBlock` the number of the current row. +The returned number starts for each block at 0. + +**Syntax** + +```sql +rowNumberInBlock() +``` + +**Returned value** + +- Ordinal number of the row in the data block starting from 0. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT rowNumberInBlock() +FROM +( + SELECT * + FROM system.numbers_mt + LIMIT 10 +) SETTINGS max_block_size = 2 +``` + +Result: + +```response +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +``` + +## rowNumberInAllBlocks + +Returns a unique row number for each row processed by `rowNumberInAllBlocks`. The returned numbers start at 0. + +**Syntax** + +```sql +rowNumberInAllBlocks() +``` + +**Returned value** + +- Ordinal number of the row in the data block starting from 0. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT rowNumberInAllBlocks() +FROM +( + SELECT * + FROM system.numbers_mt + LIMIT 10 +) +SETTINGS max_block_size = 2 +``` + +Result: + +```response +┌─rowNumberInAllBlocks()─┐ +│ 0 │ +│ 1 │ +└────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 4 │ +│ 5 │ +└────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 2 │ +│ 3 │ +└────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 6 │ +│ 7 │ +└────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 8 │ +│ 9 │ +└────────────────────────┘ +``` ## neighbor @@ -1033,7 +1346,7 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st **Arguments** - `column` — A column name or scalar expression. -- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../../sql-reference/data-types/int-uint.md). +- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../data-types/int-uint.md). - `default_value` — Optional. The returned value if offset is beyond the block boundaries. Type of data blocks affected. **Returned values** @@ -1041,7 +1354,9 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st - Value of `column` with `offset` distance from current row, if `offset` is not outside the block boundaries. - The default value of `column` or `default_value` (if given), if `offset` is outside the block boundaries. -Type: type of data blocks affected or default value type. +:::note +The return type will be that of the data blocks affected or the default value type. +::: **Example** @@ -1128,7 +1443,7 @@ Result: └────────────┴───────┴───────────┴────────────────┘ ``` -## runningDifference(x) {#runningDifference} +## runningDifference {#runningDifference} Calculates the difference between two consecutive row values in the data block. Returns 0 for the first row, and for subsequent rows the difference to the previous row. @@ -1143,7 +1458,15 @@ The result of the function depends on the affected data blocks and the order of The order of rows during calculation of `runningDifference()` can differ from the order of rows returned to the user. To prevent that you can create a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. -Example: +**Syntax** + +```sql +runningDifference(x) +``` + +**Example** + +Query: ```sql SELECT @@ -1162,6 +1485,8 @@ FROM ) ``` +Result: + ```text ┌─EventID─┬───────────EventTime─┬─delta─┐ │ 1106 │ 2016-11-24 00:00:04 │ 0 │ @@ -1174,6 +1499,8 @@ FROM Please note that the block size affects the result. The internal state of `runningDifference` state is reset for each new block. +Query: + ```sql SELECT number, @@ -1182,6 +1509,8 @@ FROM numbers(100000) WHERE diff != 1 ``` +Result: + ```text ┌─number─┬─diff─┐ │ 0 │ 0 │ @@ -1191,6 +1520,8 @@ WHERE diff != 1 └────────┴──────┘ ``` +Query: + ```sql set max_block_size=100000 -- default value is 65536! @@ -1201,6 +1532,8 @@ FROM numbers(100000) WHERE diff != 1 ``` +Result: + ```text ┌─number─┬─diff─┐ │ 0 │ 0 │ @@ -1233,14 +1566,12 @@ runningConcurrency(start, end) **Arguments** -- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `start` — A column with the start time of events. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), or [DateTime64](../data-types/datetime64.md). +- `end` — A column with the end time of events. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), or [DateTime64](../data-types/datetime64.md). **Returned values** -- The number of concurrent events at each event start time. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md) +- The number of concurrent events at each event start time. [UInt32](../data-types/int-uint.md) **Example** @@ -1272,23 +1603,43 @@ Result: └────────────┴────────────────────────────────┘ ``` -## MACNumToString(num) +## MACNumToString Interprets a UInt64 number as a MAC address in big endian format. Returns the corresponding MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form) as string. -## MACStringToNum(s) +**Syntax** + +```sql +MACNumToString(num) +``` + +## MACStringToNum The inverse function of MACNumToString. If the MAC address has an invalid format, it returns 0. -## MACStringToOUI(s) +**Syntax** + +```sql +MACStringToNum(s) +``` + +## MACStringToOUI Given a MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form), returns the first three octets as a UInt64 number. If the MAC address has an invalid format, it returns 0. +**Syntax** + +```sql +MACStringToOUI(s) +``` + ## getSizeOfEnumType -Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). +Returns the number of fields in [Enum](../data-types/enum.md). An exception is thrown if the type is not `Enum`. +**Syntax** + ```sql getSizeOfEnumType(value) ``` @@ -1349,6 +1700,8 @@ Result: Returns the internal name of the data type that represents the value. +**Syntax** + ```sql toColumnTypeName(value) ``` @@ -1427,6 +1780,8 @@ Returns the default value for the given data type. Does not include default values for custom columns set by the user. +**Syntax** + ```sql defaultValueOfArgumentType(expression) ``` @@ -1439,7 +1794,7 @@ defaultValueOfArgumentType(expression) - `0` for numbers. - Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `ᴺᵁᴸᴸ` for [Nullable](../data-types/nullable.md). **Example** @@ -1489,7 +1844,7 @@ defaultValueOfTypeName(type) - `0` for numbers. - Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `ᴺᵁᴸᴸ` for [Nullable](../data-types/nullable.md). **Example** @@ -1535,7 +1890,7 @@ SELECT * FROM table WHERE indexHint() **Returned value** -Type: [Uint8](https://clickhouse.com/docs/en/data_types/int_uint/#diapazony-uint). +- `1`. [Uint8](../data-types/int-uint.md). **Example** @@ -1625,29 +1980,31 @@ Result: Creates an array with a single value. -Used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). +:::note +This function is used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). +::: + +**Syntax** ```sql -SELECT replicate(x, arr); +replicate(x, arr) ``` -**Arguments:** +**Arguments** -- `arr` — An array. - `x` — The value to fill the result array with. +- `arr` — An array. [Array](../data-types/array.md). **Returned value** -An array of the lame length as `arr` filled with value `x`. - -Type: `Array`. +An array of the lame length as `arr` filled with value `x`. [Array](../data-types/array.md). **Example** Query: ```sql -SELECT replicate(1, ['a', 'b', 'c']) +SELECT replicate(1, ['a', 'b', 'c']); ``` Result: @@ -1658,6 +2015,36 @@ Result: └───────────────────────────────┘ ``` +## revision + +Returns the current ClickHouse [server revision](../../operations/system-tables/metrics#revision). + +**Syntax** + +```sql +revision() +``` + +**Returned value** + +- The current ClickHouse server revision. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT revision(); +``` + +Result: + +```response +┌─revision()─┐ +│ 54485 │ +└────────────┘ +``` + ## filesystemAvailable Returns the amount of free space in the filesystem hosting the database persistence. The returned value is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for the operating system. @@ -1670,9 +2057,7 @@ filesystemAvailable() **Returned value** -- The amount of remaining space available in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of remaining space available in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -1702,9 +2087,7 @@ filesystemFree() **Returned value** -- The amount of free space in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of free space in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -1734,9 +2117,7 @@ filesystemCapacity() **Returned value** -- Capacity of the filesystem in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Capacity of the filesystem in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -1756,7 +2137,7 @@ Result: ## initializeAggregation -Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values. +Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values. **Syntax** @@ -1766,7 +2147,7 @@ initializeAggregation (aggregate_function, arg1, arg2, ..., argN) **Arguments** -- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md). +- `aggregate_function` — Name of the aggregation function to initialize. [String](../data-types/string.md). - `arg` — Arguments of aggregate function. **Returned value(s)** @@ -1841,13 +2222,15 @@ finalizeAggregation(state) **Arguments** -- `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `state` — State of aggregation. [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction). **Returned value(s)** - Value/values that was aggregated. -Type: Value of any types that was aggregated. +:::note +The return type is equal to that of any types which were aggregated. +::: **Examples** @@ -1947,8 +2330,8 @@ runningAccumulate(agg_state[, grouping]); **Arguments** -- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). -- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. +- `agg_state` — State of the aggregate function. [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../data-types/index.md) for which the equality operator is defined. **Returned value** @@ -2100,7 +2483,7 @@ Result: └──────────────────────────────────────────────────┘ ``` -## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) +## catboostEvaluate :::note This function is not available in ClickHouse Cloud. @@ -2109,6 +2492,14 @@ This function is not available in ClickHouse Cloud. Evaluate an external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learning. Accepts a path to a catboost model and model arguments (features). Returns Float64. +**Syntax** + +```sql +catboostEvaluate(path_to_model, feature_1, feature_2, ..., feature_n) +``` + +**Example** + ```sql SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction FROM data_table @@ -2145,10 +2536,16 @@ communicate using a HTTP interface. By default, port `9012` is used. A different See [Training and applying models](https://catboost.ai/docs/features/training.html#training) for how to train catboost models from a training data set. -## throwIf(x\[, message\[, error_code\]\]) +## throwIf Throw an exception if argument `x` is true. +**Syntax** + +```sql +throwIf(x[, message[, error_code]]) +``` + **Arguments** - `x` - the condition to check. @@ -2208,7 +2605,7 @@ getSetting('custom_setting'); **Parameter** -- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md). +- `custom_setting` — The setting name. [String](../data-types/string.md). **Returned value** @@ -2233,7 +2630,7 @@ Result: ## isDecimalOverflow -Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is outside its precision or outside the specified precision. +Checks whether the [Decimal](../data-types/decimal.md) value is outside its precision or outside the specified precision. **Syntax** @@ -2243,8 +2640,8 @@ isDecimalOverflow(d, [p]) **Arguments** -- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). -- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `d` — value. [Decimal](../data-types/decimal.md). +- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../data-types/int-uint.md#uint-ranges). **Returned values** @@ -2280,13 +2677,11 @@ countDigits(x) **Arguments** -- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. +- `x` — [Int](../data-types/int-uint.md) or [Decimal](../data-types/decimal.md) value. **Returned value** -Number of digits. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges). :::note For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). @@ -2310,9 +2705,7 @@ Result: ## errorCodeToName -Returns the textual name of an error code. - -Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). +- The textual name of an error code. [LowCardinality(String)](../data-types/lowcardinality.md). **Syntax** @@ -2343,9 +2736,7 @@ tcpPort() **Returned value** -- The TCP port number. - -Type: [UInt16](../../sql-reference/data-types/int-uint.md). +- The TCP port number. [UInt16](../data-types/int-uint.md). **Example** @@ -2381,9 +2772,7 @@ currentProfiles() **Returned value** -- List of the current user settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the current user settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## enabledProfiles @@ -2397,9 +2786,7 @@ enabledProfiles() **Returned value** -- List of the enabled settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## defaultProfiles @@ -2413,9 +2800,7 @@ defaultProfiles() **Returned value** -- List of the default settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## currentRoles @@ -2429,9 +2814,7 @@ currentRoles() **Returned value** -- A list of the current roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- A list of the current roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## enabledRoles @@ -2445,9 +2828,7 @@ enabledRoles() **Returned value** -- List of the enabled roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## defaultRoles @@ -2461,9 +2842,7 @@ defaultRoles() **Returned value** -- List of the default roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## getServerPort @@ -2477,7 +2856,7 @@ getServerPort(port_name) **Arguments** -- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: +- `port_name` — The name of the server port. [String](../data-types/string.md#string). Possible values: - 'tcp_port' - 'tcp_port_secure' @@ -2492,9 +2871,7 @@ getServerPort(port_name) **Returned value** -- The number of the server port. - -Type: [UInt16](../../sql-reference/data-types/int-uint.md). +- The number of the server port. [UInt16](../data-types/int-uint.md). **Example** @@ -2526,9 +2903,7 @@ queryID() **Returned value** -- The ID of the current query. - -Type: [String](../../sql-reference/data-types/string.md) +- The ID of the current query. [String](../data-types/string.md) **Example** @@ -2562,9 +2937,7 @@ initialQueryID() **Returned value** -- The ID of the initial current query. - -Type: [String](../../sql-reference/data-types/string.md) +- The ID of the initial current query. [String](../data-types/string.md) **Example** @@ -2597,9 +2970,7 @@ shardNum() **Returned value** -- Shard index or constant `0`. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Shard index or constant `0`. [UInt32](../data-types/int-uint.md). **Example** @@ -2639,9 +3010,7 @@ shardCount() **Returned value** -- Total number of shards or `0`. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Total number of shards or `0`. [UInt32](../data-types/int-uint.md). **See Also** @@ -2663,9 +3032,7 @@ getOSKernelVersion() **Returned value** -- The current OS kernel version. - -Type: [String](../../sql-reference/data-types/string.md). +- The current OS kernel version. [String](../data-types/string.md). **Example** @@ -2699,9 +3066,7 @@ zookeeperSessionUptime() **Returned value** -- Uptime of the current ZooKeeper session in seconds. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Uptime of the current ZooKeeper session in seconds. [UInt32](../data-types/int-uint.md). **Example** @@ -2738,9 +3103,7 @@ All arguments must be constant. **Returned value** -- Randomly generated table structure. - -Type: [String](../../sql-reference/data-types/string.md). +- Randomly generated table structure. [String](../data-types/string.md). **Examples** @@ -2807,9 +3170,7 @@ structureToCapnProtoSchema(structure) **Returned value** -- CapnProto schema - -Type: [String](../../sql-reference/data-types/string.md). +- CapnProto schema. [String](../data-types/string.md). **Examples** @@ -2908,9 +3269,7 @@ structureToProtobufSchema(structure) **Returned value** -- Protobuf schema - -Type: [String](../../sql-reference/data-types/string.md). +- Protobuf schema. [String](../data-types/string.md). **Examples** @@ -2990,11 +3349,11 @@ formatQueryOrNull(query) **Arguments** -- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md) +- `query` - The SQL query to be formatted. [String](../data-types/string.md) **Returned value** -- The formatted query. [String](../../sql-reference/data-types/string.md). +- The formatted query. [String](../data-types/string.md). **Example** @@ -3029,11 +3388,11 @@ formatQuerySingleLineOrNull(query) **Arguments** -- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md) +- `query` - The SQL query to be formatted. [String](../data-types/string.md) **Returned value** -- The formatted query. [String](../../sql-reference/data-types/string.md). +- The formatted query. [String](../data-types/string.md). **Example** @@ -3061,8 +3420,8 @@ variantElement(variant, type_name, [, default_value]) **Arguments** -- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). -- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md). +- `variant` — Variant column. [Variant](../data-types/variant.md). +- `type_name` — The name of the variant type to extract. [String](../data-types/string.md). - `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional. **Returned value** @@ -3098,7 +3457,7 @@ variantType(variant) **Arguments** -- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). +- `variant` — Variant column. [Variant](../data-types/variant.md). **Returned value** @@ -3301,3 +3660,31 @@ The setting is not enabled by default for security reasons, because some headers HTTP headers are case sensitive for this function. If the function is used in the context of a distributed query, it returns non-empty result only on the initiator node. + +## showCertificate + +Shows information about the current server's Secure Sockets Layer (SSL) certificate if it has been configured. See [Configuring SSL-TLS](https://clickhouse.com/docs/en/guides/sre/configuring-ssl) for more information on how to configure ClickHouse to use OpenSSL certificates to validate connections. + +**Syntax** + +```sql +showCertificate() +``` + +**Returned value** + +- Map of key-value pairs relating to the configured SSL certificate. [Map](../data-types/map.md)([String](../data-types/string.md), [String](../data-types/string.md)). + +**Example** + +Query: + +```sql +SELECT showCertificate() FORMAT LineAsString; +``` + +Result: + +```response +{'version':'1','serial_number':'2D9071D64530052D48308473922C7ADAFA85D6C5','signature_algo':'sha256WithRSAEncryption','issuer':'/CN=marsnet.local CA','not_before':'May 7 17:01:21 2024 GMT','not_after':'May 7 17:01:21 2025 GMT','subject':'/CN=chnode1','pkey_algo':'rsaEncryption'} +``` diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 2d7752ed022..a9b483aa0e5 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -169,7 +169,7 @@ randUniform(min, max) ### Returned value -A random number of type [Float64](/docs/en/sql-reference/data-types/float.md). +A random number of type [Float64](../data-types/float.md). ### Example @@ -204,9 +204,7 @@ randNormal(mean, variance) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -243,9 +241,7 @@ randLogNormal(mean, variance) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -282,9 +278,7 @@ randBinomial(experiments, probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -321,9 +315,7 @@ randNegativeBinomial(experiments, probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -359,9 +351,7 @@ randPoisson(n) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -397,9 +387,7 @@ randBernoulli(probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -435,9 +423,7 @@ randExponential(lambda) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -473,9 +459,7 @@ randChiSquared(degree_of_freedom) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -511,9 +495,7 @@ randStudentT(degree_of_freedom) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -550,9 +532,7 @@ randFisherF(d1, d2) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -588,9 +568,7 @@ randomString(length) **Returned value** -- String filled with random bytes. - -Type: [String](../../sql-reference/data-types/string.md). +- String filled with random bytes. [String](../data-types/string.md). **Example** @@ -626,13 +604,11 @@ randomFixedString(length); **Arguments** -- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — String length in bytes. [UInt64](../data-types/int-uint.md). **Returned value(s)** -- String filled with random bytes. - -Type: [FixedString](../../sql-reference/data-types/fixedstring.md). +- String filled with random bytes. [FixedString](../data-types/fixedstring.md). **Example** @@ -667,9 +643,7 @@ randomPrintableASCII(length) **Returned value** -- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. - -Type: [String](../../sql-reference/data-types/string.md) +- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [String](../data-types/string.md) **Example** @@ -697,13 +671,11 @@ randomStringUTF8(length); **Arguments** -- `length` — Length of the string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — Length of the string in code points. [UInt64](../data-types/int-uint.md). **Returned value(s)** -- UTF-8 random string. - -Type: [String](../../sql-reference/data-types/string.md). +- UTF-8 random string. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index afec43cd6f4..d18185c5013 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -36,8 +36,8 @@ Alias: `truncate`. **Parameters** -- `input`: A numeric type ([Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md) or [Integer](/docs/en/sql-reference/data-types/int-uint.md)). -- `precision`: An [Integer](/docs/en/sql-reference/data-types/int-uint.md) type. +- `input`: A numeric type ([Float](../data-types/float.md), [Decimal](../data-types/decimal.md) or [Integer](../data-types/int-uint.md)). +- `precision`: An [Integer](../data-types/int-uint.md) type. **Returned value** @@ -69,7 +69,7 @@ round(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types). - `decimal-places` — An integer value. - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. @@ -171,7 +171,7 @@ roundBankers(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types). - `decimal-places` — Decimal places. An integer number. - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 8e50637cf30..20d63d84628 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -19,20 +19,20 @@ splitByChar(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `separator` — The separator which should contain exactly one character. [String](../data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. If `max_substrings` > 0, the returned array will contain at most `max_substrings` substrings, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). + + Empty substrings may be selected when: - A separator occurs at the beginning or end of the string; - There are multiple consecutive separators; - The original string `s` is empty. -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). - :::note The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings > 0` meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list. For example, @@ -70,21 +70,23 @@ splitByString(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `separator` — The separator. [String](../data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Empty substrings may be selected when: - A non-empty separator occurs at the beginning or end of the string; - There are multiple consecutive non-empty separators; - The original string `s` is empty while the separator is not empty. +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -125,21 +127,24 @@ splitByRegexp(regexp, s[, max_substrings])) **Arguments** - `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). + + +Empty substrings may be selected when: - A non-empty regular expression match occurs at the beginning or end of the string; - There are multiple consecutive non-empty regular expression matches; - The original string `s` is empty while the regular expression is not empty. -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). - +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -180,17 +185,17 @@ splitByWhitespace(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). - +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). + +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -219,17 +224,17 @@ splitByNonAlpha(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -282,16 +287,16 @@ Alias: `splitByAlpha` **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -322,11 +327,7 @@ extractAllGroups(text, regexp) **Returned values** -- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). - -- If there is no matching group, returns an empty array. - -Type: [Array](../data-types/array.md). +- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). If there is no matching group, it returns an empty array. [Array](../data-types/array.md). **Example** @@ -354,14 +355,12 @@ ngrams(string, ngramsize) **Arguments** -- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `ngramsize` — The size of an n-gram. [UInt](../data-types/int-uint.md). **Returned values** -- Array with n-grams. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- Array with n-grams. [Array](../data-types/array.md)([String](../data-types/string.md)). **Example** @@ -383,13 +382,11 @@ Splits a string into tokens using non-alphanumeric ASCII characters as separator **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object. **Returned value** -- The resulting array of tokens from input string. - -Type: [Array](../data-types/array.md). +- The resulting array of tokens from input string. [Array](../data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index e6703b573cb..342ca2b9f03 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -30,9 +30,7 @@ empty(x) **Returned value** -- Returns `1` for an empty string or `0` for a non-empty string. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty string or `0` for a non-empty string. [UInt8](../data-types/int-uint.md). **Example** @@ -68,9 +66,7 @@ notEmpty(x) **Returned value** -- Returns `1` for a non-empty string or `0` for an empty string string. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty string or `0` for an empty string string. [UInt8](../data-types/int-uint.md). **Example** @@ -187,7 +183,7 @@ left(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -234,7 +230,7 @@ leftUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -289,9 +285,7 @@ Alias: `LPAD` **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -325,9 +319,7 @@ leftPadUTF8(string, length[, pad_string]) **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -355,7 +347,7 @@ right(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -402,7 +394,7 @@ rightUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -457,9 +449,7 @@ Alias: `RPAD` **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -493,9 +483,7 @@ rightPadUTF8(string, length[, pad_string]) **Returned value** -- A right-padded string of the given length. - -Type: [String](../data-types/string.md). +- A right-padded string of the given length. [String](../data-types/string.md). **Example** @@ -525,11 +513,11 @@ Alias: `lcase` **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Example** @@ -559,11 +547,11 @@ Alias: `ucase` **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Examples** @@ -603,11 +591,11 @@ upperUTF8(input) **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Example** @@ -639,7 +627,7 @@ toValidUTF8(input_string) **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object. **Returned value** @@ -671,14 +659,12 @@ Alias: `REPEAT` **Arguments** -- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). -- `n` — The number of times to repeat the string. [UInt* or Int*](../../sql-reference/data-types/int-uint.md). +- `s` — The string to repeat. [String](../data-types/string.md). +- `n` — The number of times to repeat the string. [UInt* or Int*](../data-types/int-uint.md). **Returned value** -A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string. - -Type: `String`. +A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string. [String](../data-types/string.md). **Example** @@ -708,13 +694,11 @@ Alias: `SPACE`. **Arguments** -- `n` — The number of times to repeat the space. [UInt* or Int*](../../sql-reference/data-types/int-uint.md). +- `n` — The number of times to repeat the space. [UInt* or Int*](../data-types/int-uint.md). **Returned value** -The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string. - -Type: `String`. +The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string. [String](../data-types/string.md). **Example** @@ -754,7 +738,7 @@ concat(s1, s2, ...) At least one value of arbitrary type. -Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. +Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** @@ -861,8 +845,8 @@ Alias: `concat_ws` **Arguments** -- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. +- sep — separator. Const [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- exprN — expression to be concatenated. Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** @@ -907,15 +891,13 @@ Alias: **Arguments** -- `s` — The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) -- `offset` — The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). -- `length` — The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. +- `s` — The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset` — The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length` — The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** -A substring of `s` with `length` many bytes, starting at index `offset`. - -Type: `String`. +A substring of `s` with `length` many bytes, starting at index `offset`. [String](../data-types/string.md). **Example** @@ -945,9 +927,9 @@ substringUTF8(s, offset[, length]) **Arguments** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) -- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). -- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. +- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** @@ -983,8 +965,8 @@ Alias: `SUBSTRING_INDEX` **Arguments** -- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md). -- delim: The character to split. [String](../../sql-reference/data-types/string.md). +- s: The string to extract substring from. [String](../data-types/string.md). +- delim: The character to split. [String](../data-types/string.md). - count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Example** @@ -1014,13 +996,13 @@ substringIndexUTF8(s, delim, count) **Arguments** -- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md). -- `delim`: The character to split. [String](../../sql-reference/data-types/string.md). +- `s`: The string to extract substring from. [String](../data-types/string.md). +- `delim`: The character to split. [String](../data-types/string.md). - `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Returned value** -A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`. +A substring [String](../data-types/string.md) of `s` before `count` occurrences of `delim`. **Implementation details** @@ -1058,7 +1040,7 @@ convertCharset(s, from, to) ## base58Encode -Encodes a String using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) in the "Bitcoin" alphabet. +Encodes a String using [Base58](https://datatracker.ietf.org/doc/html/draft-msporny-base58) in the "Bitcoin" alphabet. **Syntax** @@ -1068,13 +1050,11 @@ base58Encode(plaintext) **Arguments** -- `plaintext` — [String](../../sql-reference/data-types/string.md) column or constant. +- `plaintext` — [String](../data-types/string.md) column or constant. **Returned value** -- A string containing the encoded value of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string containing the encoded value of the argument. [String](../data-types/string.md). **Example** @@ -1092,7 +1072,7 @@ Result: ## base58Decode -Accepts a String and decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet. +Accepts a String and decodes it using [Base58](https://datatracker.ietf.org/doc/html/draft-msporny-base58) encoding scheme using "Bitcoin" alphabet. **Syntax** @@ -1102,13 +1082,11 @@ base58Decode(encoded) **Arguments** -- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown. +- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown. **Returned value** -- A string containing the decoded value of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string containing the decoded value of the argument. [String](../data-types/string.md). **Example** @@ -1136,7 +1114,7 @@ tryBase58Decode(encoded) **Parameters** -- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. +- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. **Returned value** @@ -1180,7 +1158,7 @@ tryBase64Decode(encoded) **Parameters** -- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. +- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. **Examples** @@ -1279,14 +1257,12 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) **Arguments** -- `trim_character` — Specified characters for trim. [String](../../sql-reference/data-types/string.md). -- `input_string` — String for trim. [String](../../sql-reference/data-types/string.md). +- `trim_character` — Specified characters for trim. [String](../data-types/string.md). +- `input_string` — String for trim. [String](../data-types/string.md). **Returned value** -A string without leading and/or trailing specified characters. - -Type: `String`. +A string without leading and/or trailing specified characters. [String](../data-types/string.md). **Example** @@ -1316,13 +1292,11 @@ Alias: `ltrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** -A string without leading common whitespaces. - -Type: `String`. +A string without leading common whitespaces. [String](../data-types/string.md). **Example** @@ -1352,13 +1326,11 @@ Alias: `rtrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** -A string without trailing common whitespaces. - -Type: `String`. +A string without trailing common whitespaces. [String](../data-types/string.md). **Example** @@ -1388,13 +1360,11 @@ Alias: `trim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** -A string without leading and trailing common whitespaces. - -Type: `String`. +A string without leading and trailing common whitespaces. [String](../data-types/string.md). **Example** @@ -1440,13 +1410,11 @@ normalizeQuery(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../data-types/string.md). **Returned value** -- Sequence of characters with placeholders. - -Type: [String](../../sql-reference/data-types/string.md). +- Sequence of characters with placeholders. [String](../data-types/string.md). **Example** @@ -1474,13 +1442,11 @@ normalizedQueryHash(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../data-types/string.md). **Returned value** -- Hash value. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges). **Example** @@ -1508,13 +1474,11 @@ normalizeUTF8NFC(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFC normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFC normalization form. [String](../data-types/string.md). **Example** @@ -1542,13 +1506,11 @@ normalizeUTF8NFD(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFD normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFD normalization form. [String](../data-types/string.md). **Example** @@ -1576,13 +1538,11 @@ normalizeUTF8NFKC(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFKC normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFKC normalization form. [String](../data-types/string.md). **Example** @@ -1610,13 +1570,11 @@ normalizeUTF8NFKD(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFKD normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFKD normalization form. [String](../data-types/string.md). **Example** @@ -1647,13 +1605,11 @@ encodeXMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The escaped string. [String](../data-types/string.md). **Example** @@ -1687,13 +1643,11 @@ decodeXMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The un-escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The un-escaped string. [String](../data-types/string.md). **Example** @@ -1723,13 +1677,11 @@ decodeHTMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The un-escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The un-escaped string. [String](../data-types/string.md). **Example** @@ -1778,13 +1730,11 @@ extractTextFromHTML(x) **Arguments** -- `x` — input text. [String](../../sql-reference/data-types/string.md). +- `x` — input text. [String](../data-types/string.md). **Returned value** -- Extracted text. - -Type: [String](../../sql-reference/data-types/string.md). +- Extracted text. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 0b761b62006..7aeb1f5b2a7 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -139,7 +139,7 @@ Format the `pattern` string with the values (strings, integers, etc.) listed in **Syntax** ```sql -format(pattern, s0, s1, …) +format(pattern, s0, s1, ...) ``` **Example** @@ -202,13 +202,13 @@ translateUTF8(s, from, to) **Parameters** -- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md). -- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md). -- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `s`: A string type [String](../data-types/string.md). +- `from`: A string type [String](../data-types/string.md). +- `to`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Examples** diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 9738c19bf3c..d261cff3580 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -17,7 +17,7 @@ Functions in this section also assume that the searched string (referred to in t violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the results are undefined. Note that no automatic Unicode normalization is performed, however you can use the -[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +[normalizeUTF8*()](https://clickhouse.com../functions/string-functions/) functions for that. [General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately. @@ -38,12 +38,12 @@ Alias: - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -- Starting position in bytes and counting from 1, if the substring was found. -- 0, if the substring was not found. +- Starting position in bytes and counting from 1, if the substring was found. [UInt64](../data-types/int-uint.md). +- 0, if the substring was not found. [UInt64](../data-types/int-uint.md). If substring `needle` is empty, these rules apply: - if no `start_pos` was specified: return `1` @@ -53,8 +53,6 @@ If substring `needle` is empty, these rules apply: The same rules also apply to functions `locate`, `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`. -Type: `Integer`. - **Examples** Query: @@ -206,9 +204,9 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) **Arguments** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). -**Returned values** +**Returned value** - Array of the starting position in bytes and counting from 1, if the substring was found. - 0, if the substring was not found. @@ -241,7 +239,7 @@ multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -275,7 +273,7 @@ multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -311,7 +309,7 @@ multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., nee **Parameters** - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -349,7 +347,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -383,7 +381,7 @@ multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needle **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -417,7 +415,7 @@ multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -453,7 +451,7 @@ multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., ne **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -490,12 +488,11 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -524,12 +521,11 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -558,12 +554,11 @@ multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle, Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -594,12 +589,11 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -632,7 +626,7 @@ multiSearchAny(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -666,7 +660,7 @@ multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -700,7 +694,7 @@ multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -736,7 +730,7 @@ multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -799,7 +793,7 @@ If you only want to search multiple substrings in a string, you can use function **Syntax** ```sql -multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAnyIndex @@ -809,7 +803,7 @@ Like `multiMatchAny` but returns any index that matches the haystack. **Syntax** ```sql -multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAllIndices @@ -819,7 +813,7 @@ Like `multiMatchAny` but returns the array of all indices that match the haystac **Syntax** ```sql -multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAny @@ -833,7 +827,7 @@ Like `multiMatchAny` but returns 1 if any pattern matches the haystack within a **Syntax** ```sql -multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAnyIndex @@ -843,7 +837,7 @@ Like `multiFuzzyMatchAny` but returns any index that matches the haystack within **Syntax** ```sql -multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAllIndices @@ -853,7 +847,7 @@ Like `multiFuzzyMatchAny` but returns the array of all indices in any order that **Syntax** ```sql -multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## extract @@ -896,14 +890,16 @@ extractAllGroupsHorizontal(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. [String](../data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Array of arrays of matches. [Array](../data-types/array.md). +:::note If `haystack` does not match the `pattern` regex, an array of empty arrays is returned. +::: **Example** @@ -931,14 +927,16 @@ extractAllGroupsVertical(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. [String](../data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Array of arrays of matches. [Array](../data-types/array.md). +:::note If `haystack` does not match the `pattern` regex, an empty array is returned. +::: **Example** @@ -968,7 +966,7 @@ Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which i If the haystack or the LIKE expression are not valid UTF-8, the behavior is undefined. -No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com../functions/string-functions/) functions for that. To match against literal `%`, `_` and `\` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`. @@ -1005,7 +1003,7 @@ Alias: `haystack NOT ILIKE pattern` (operator) ## ngramDistance -Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other. +Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other. Functions [`ngramDistanceCaseInsensitive`](#ngramdistancecaseinsensitive), [`ngramDistanceUTF8`](#ngramdistanceutf8), [`ngramDistanceCaseInsensitiveUTF8`](#ngramdistancecaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. @@ -1022,7 +1020,7 @@ ngramDistance(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Implementation details** @@ -1076,7 +1074,7 @@ ngramDistanceCaseInsensitive(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Examples** @@ -1125,7 +1123,7 @@ ngramDistanceUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Example** @@ -1158,7 +1156,7 @@ ngramDistanceCaseInsensitiveUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Example** @@ -1176,7 +1174,7 @@ Result: ## ngramSearch -Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex). +Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex). Functions [`ngramSearchCaseInsensitive`](#ngramsearchcaseinsensitive), [`ngramSearchUTF8`](#ngramsearchutf8), [`ngramSearchCaseInsensitiveUTF8`](#ngramsearchcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. @@ -1193,7 +1191,7 @@ ngramSearch(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) **Implementation details** @@ -1232,7 +1230,7 @@ ngramSearchCaseInsensitive(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1267,7 +1265,7 @@ ngramSearchUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1302,7 +1300,7 @@ ngramSearchCaseInsensitiveUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1336,13 +1334,11 @@ countSubstrings(haystack, needle[, start_pos]) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1385,13 +1381,11 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos]) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1439,13 +1433,11 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1492,13 +1484,11 @@ countMatches(haystack, pattern) **Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md). **Returned value** -- The number of matches. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of matches. [UInt64](../data-types/int-uint.md). **Examples** @@ -1539,13 +1529,11 @@ countMatchesCaseInsensitive(haystack, pattern) **Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md). **Returned value** -- The number of matches. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of matches. [UInt64](../data-types/int-uint.md). **Examples** @@ -1579,13 +1567,11 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`. - `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional. +- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** -`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. - -Type: `String`. +`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. [String](../data-types/string.md). **Examples** @@ -1622,12 +1608,9 @@ hasSubsequence(haystack, needle) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -1660,12 +1643,9 @@ hasSubsequenceCaseInsensitive(haystack, needle) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack, 0 otherwise [UInt8](../data-types/int-uint.md). **Examples** @@ -1698,12 +1678,9 @@ hasSubsequenceUTF8(haystack, needle) - `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack, 0, otherwise. [UInt8](../data-types/int-uint.md). Query: @@ -1736,12 +1713,9 @@ hasSubsequenceCaseInsensitiveUTF8(haystack, needle) - `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -1776,8 +1750,7 @@ hasToken(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if the token is not present. +- 1, if the token is present in the haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Implementation details** @@ -1812,9 +1785,7 @@ hasTokenOrNull(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if the token is not present in the haystack. -- null, if the token is ill-formed. +- 1, if the token is present in the haystack, 0 if it is not present, and null if the token is ill formed. **Implementation details** @@ -1851,8 +1822,7 @@ hasTokenCaseInsensitive(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, otherwise. +- 1, if the token is present in the haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Implementation details** @@ -1887,9 +1857,7 @@ hasTokenCaseInsensitiveOrNull(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if token is not present. -- null, if the token is ill-formed. +- 1, if the token is present in the haystack, 0 if the token is not present, otherwise [`null`](../data-types/nullable.md) if the token is ill-formed. [UInt8](../data-types/int-uint.md). **Implementation details** diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index e80a3fa9860..ce5dea14ec5 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -30,9 +30,7 @@ At least four data points are required in `series` to detect outliers. **Returned value** -- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. - -Type: [Array](../../sql-reference/data-types/array.md). +- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. [Array](../data-types/array.md). **Examples** @@ -81,10 +79,7 @@ seriesPeriodDetectFFT(series); **Returned value** -- A real value equal to the period of series data -- Returns NAN when number of data points are less than four. - -Type: [Float64](../../sql-reference/data-types/float.md). +- A real value equal to the period of series data. NaN when number of data points are less than four. [Float64](../data-types/float.md). **Examples** @@ -134,9 +129,7 @@ The number of data points in `series` should be at least twice the value of `per **Returned value** - An array of four arrays where the first array include seasonal components, the second array - trend, -the third array - residue component, and the fourth array - baseline(seasonal + trend) component. - -Type: [Array](../../sql-reference/data-types/array.md). +the third array - residue component, and the fourth array - baseline(seasonal + trend) component. [Array](../data-types/array.md). **Examples** diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index d8f23c92e61..2cec1987c20 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -17,15 +17,13 @@ tumble(time_attr, interval [, timezone]) ``` **Arguments** -- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. +- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. +- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding tumbling window. - -Type: `Tuple(DateTime, DateTime)` +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. **Example** @@ -53,16 +51,14 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Arguments** -- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. +- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. +- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. - -Type: `Tuple(DateTime, DateTime)` +- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. **Example** diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 64b1732597f..0663be08240 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -7,15 +7,15 @@ sidebar_label: Tuples ## tuple A function that allows grouping multiple columns. -For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function. +For columns with the types T1, T2, ..., it returns a Tuple(T1, T2, ...) type tuple containing these columns. There is no cost to execute the function. Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. -The function implements the operator `(x, y, …)`. +The function implements the operator `(x, y, ...)`. **Syntax** ``` sql -tuple(x, y, …) +tuple(x, y, ...) ``` ## tupleElement @@ -35,7 +35,7 @@ tupleElement(tuple, name, [, default_value]) ## untuple -Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location. +Performs syntactic substitution of [tuple](../data-types/tuple.md#tuplet1-t2) elements in the call location. The names of the result columns are implementation-specific and subject to change. Do not assume specific column names after `untuple`. @@ -49,7 +49,7 @@ You can use the `EXCEPT` expression to skip columns as a result of the query. **Arguments** -- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). +- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../data-types/tuple.md). **Returned value** @@ -111,7 +111,7 @@ Result: **See Also** -- [Tuple](../../sql-reference/data-types/tuple.md) +- [Tuple](../data-types/tuple.md) ## tupleHammingDistance @@ -125,8 +125,8 @@ tupleHammingDistance(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). Tuples should have the same type of the elements. @@ -134,7 +134,9 @@ Tuples should have the same type of the elements. - The Hamming distance. -Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. +:::note +The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. +::: ``` sql SELECT @@ -196,13 +198,11 @@ tupleToNameValuePairs(tuple) **Arguments** -- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. +- `tuple` — Named tuple. [Tuple](../data-types/tuple.md) with any types of values. **Returned value** -- An array with (name, value) pairs. - -Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). +- An array with (name, value) pairs. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), ...)). **Example** @@ -273,14 +273,12 @@ Alias: `vectorSum`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the sum. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the sum. [Tuple](../data-types/tuple.md). **Example** @@ -312,14 +310,12 @@ Alias: `vectorDifference`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of subtraction. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of subtraction. [Tuple](../data-types/tuple.md). **Example** @@ -349,14 +345,12 @@ tupleMultiply(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the multiplication. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the multiplication. [Tuple](../data-types/tuple.md). **Example** @@ -386,14 +380,12 @@ tupleDivide(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of division. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of division. [Tuple](../data-types/tuple.md). **Example** @@ -423,13 +415,11 @@ tupleNegate(tuple) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of negation. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of negation. [Tuple](../data-types/tuple.md). **Example** @@ -459,14 +449,12 @@ tupleMultiplyByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `number` — Multiplier. [Int/UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- Tuple with multiplied values. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with multiplied values. [Tuple](../data-types/tuple.md). **Example** @@ -496,14 +484,12 @@ tupleDivideByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `number` — Divider. [Int/UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- Tuple with divided values. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with divided values. [Tuple](../data-types/tuple.md). **Example** @@ -531,7 +517,7 @@ tupleConcat(tuples) **Arguments** -- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. +- `tuples` – Arbitrary number of arguments of [Tuple](../data-types/tuple.md) type. **Example** diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 8283de95994..ad40725d680 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -6,7 +6,7 @@ sidebar_label: Maps ## map -Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type. +Creates a value of type [Map(key, value)](../data-types/map.md) from key-value pairs. **Syntax** @@ -16,14 +16,12 @@ map(key1, value1[, key2, value2, ...]) **Arguments** -- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). +- `key_n` — The keys of the map entries. Any type supported as key type of [Map](../data-types/map.md). +- `value_n` — The values of the map entries. Any type supported as value type of [Map](../data-types/map.md). **Returned value** -- Data structure as `key:value` pairs. - -Type: [Map(key, value)](../../sql-reference/data-types/map.md). +- A map containing `key:value` pairs. [Map(key, value)](../data-types/map.md). **Examples** @@ -43,35 +41,16 @@ Result: └──────────────────────────────────────────────────┘ ``` -Query: - -```sql -CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a; -INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); -SELECT a['key2'] FROM table_map; -``` - -Result: - -```text -┌─arrayElement(a, 'key2')─┐ -│ 0 │ -│ 2 │ -│ 4 │ -└─────────────────────────┘ -``` - -**See Also** - -- [Map(key, value)](../../sql-reference/data-types/map.md) data type - ## mapFromArrays -Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing. +Creates a map from an array of keys and an array of values. +The function is a convenient alternative to syntax `CAST([...], 'Map(key_type, value_type)')`. +For example, instead of writing +- `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, or +- `CAST([('aa',4), ('bb',5)], 'Map(String, UInt32)')` -The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. - +you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. **Syntax** @@ -83,12 +62,12 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** -- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md) -- `values` - Given value array or map to create a map from. +- `keys` — Array of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type. +- `values` - Array or map of values to create the map from. [Array](../data-types/array.md) or [Map](../data-types/map.md). **Returned value** -- A map whose keys and values are constructed from the key array and value array/map. +- A map with keys and values constructed from the key array and value array/map. **Example** @@ -96,14 +75,25 @@ Query: ```sql select mapFromArrays(['a', 'b', 'c'], [1, 2, 3]) +``` +Result: +``` ┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐ │ {'a':1,'b':2,'c':3} │ └───────────────────────────────────────────┘ +``` +`mapFromArrays` also accepts arguments of type [Map](../data-types/map.md). These are casted to array of tuples during execution. + +```sql SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) +``` +Result: + +``` ┌─mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))─┐ │ {1:('a',1),2:('b',2),3:('c',3)} │ └───────────────────────────────────────────────────────┘ @@ -111,9 +101,11 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) ## extractKeyValuePairs -Extracts key-value pairs, i.e. a [Map(String, String)](../../sql-reference/data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files). - -A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported. +Converts a string of key-value pairs to a [Map(String, String)](../data-types/map.md). +Parsing is tolerant towards noise (e.g. log files). +Key-value pairs in the input string consist of a key, followed by a key-value delimiter, and a value. +Key value pairs are separated by a pair delimiter. +Keys and values can be quoted. **Syntax** @@ -127,18 +119,18 @@ Alias: **Arguments** -- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `data` - String to extract key-value pairs from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `key_value_delimiter` - Single character delimiting keys and values. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `pair_delimiters` - Set of character delimiting pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `quoting_character` - Single character used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned values** -- A [Map(String, String)](../../sql-reference/data-types/map.md) of key-value pairs. +- A of key-value pairs. Type: [Map(String, String)](../data-types/map.md) **Examples** -Simple case: +Query ``` sql SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv @@ -152,7 +144,7 @@ Result: └─────────────────────────────────────────────────────────────────────────┘ ``` -Single quote as quoting character: +With a single quote `'` as quoting character: ``` sql SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv @@ -180,9 +172,29 @@ Result: └────────────────────────┘ ``` +To restore a map string key-value pairs serialized with `toString`: + +```sql +SELECT + map('John', '33', 'Paula', '31') AS m, + toString(m) as map_serialized, + extractKeyValuePairs(map_serialized, ':', ',', '\'') AS map_restored +FORMAT Vertical; +``` + +Result: + +``` +Row 1: +────── +m: {'John':'33','Paula':'31'} +map_serialized: {'John':'33','Paula':'31'} +map_restored: {'John':'33','Paula':'31'} +``` + ## extractKeyValuePairsWithEscaping -Same as `extractKeyValuePairs` but with escaping support. +Same as `extractKeyValuePairs` but supports escaping. Supported escape sequences: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`. Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following: @@ -223,28 +235,14 @@ mapAdd(arg1, arg2 [, ...]) **Arguments** -Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promoted to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promoted to the one type ([Int64](../data-types/int-uint.md#int-ranges), [UInt64](../data-types/int-uint.md#uint-ranges) or [Float64](../data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../data-types/map.md) or [tuple](../data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** -Query with a tuple: - -```sql -SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; -``` - -Result: - -```text -┌─res───────────┬─type───────────────────────────────┐ -│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │ -└───────────────┴────────────────────────────────────┘ -``` - Query with `Map` type: ```sql @@ -259,6 +257,20 @@ Result: └──────────────────────────────┘ ``` +Query with a tuple: + +```sql +SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; +``` + +Result: + +```text +┌─res───────────┬─type───────────────────────────────┐ +│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │ +└───────────────┴────────────────────────────────────┘ +``` + ## mapSubtract Collect all the keys and subtract corresponding values. @@ -271,28 +283,14 @@ mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...]) **Arguments** -Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../data-types/int-uint.md#int-ranges), [UInt64](../data-types/int-uint.md#uint-ranges) or [Float64](../data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../data-types/map.md) or [tuple](../data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** -Query with a tuple map: - -```sql -SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type; -``` - -Result: - -```text -┌─res────────────┬─type──────────────────────────────┐ -│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │ -└────────────────┴───────────────────────────────────┘ -``` - Query with `Map` type: ```sql @@ -307,55 +305,57 @@ Result: └───────────────────────────────────┘ ``` -## mapPopulateSeries - -Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array. - -**Syntax** +Query with a tuple map: ```sql -mapPopulateSeries(keys, values[, max]) -mapPopulateSeries(map[, max]) -``` - -Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key. - -For array arguments the number of elements in `keys` and `values` must be the same for each row. - -**Arguments** - -Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key. - -Mapped arrays: - -- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges). - -or - -- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md). - -**Returned value** - -- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. - -**Example** - -Query with mapped arrays: - -```sql -SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type; +SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type; ``` Result: ```text -┌─res──────────────────────────┬─type──────────────────────────────┐ -│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │ -└──────────────────────────────┴───────────────────────────────────┘ +┌─res────────────┬─type──────────────────────────────┐ +│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │ +└────────────────┴───────────────────────────────────┘ ``` +## mapPopulateSeries + +Fills missing key-value pairs in a map with integer keys. +To support extending the keys beyond the largest value, a maximum key can be specified. +More specifically, the function returns a map in which the the keys form a series from the smallest to the largest key (or `max` argument if it specified) with step size of 1, and corresponding values. +If no value is specified for a key, a default value is used as value. +In case keys repeat, only the first value (in order of appearance) is associated with the key. + +**Syntax** + +```sql +mapPopulateSeries(map[, max]) +mapPopulateSeries(keys, values[, max]) +``` + +For array arguments the number of elements in `keys` and `values` must be the same for each row. + +**Arguments** + +Arguments are [Maps](../data-types/map.md) or two [Arrays](../data-types/array.md#data-type-array), where the first and second array contains keys and values for the each key. + +Mapped arrays: + +- `map` — Map with integer keys. [Map](../data-types/map.md). + +or + +- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges). + +**Returned value** + +- Depending on the arguments a [Map](../data-types/map.md) or a [Tuple](../data-types/tuple.md#tuplet1-t2) of two [Arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. + +**Example** + Query with `Map` type: ```sql @@ -370,9 +370,23 @@ Result: └─────────────────────────────────────────┘ ``` +Query with mapped arrays: + +```sql +SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type; +``` + +Result: + +```text +┌─res──────────────────────────┬─type──────────────────────────────┐ +│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │ +└──────────────────────────────┴───────────────────────────────────┘ +``` + ## mapContains -Determines whether the `map` contains the `key` parameter. +Returns if a given key is contained in a given map. **Syntax** @@ -382,25 +396,23 @@ mapContains(map, key) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). -- `key` — Key. Type matches the type of keys of `map` parameter. +- `map` — Map. [Map](../data-types/map.md). +- `key` — Key. Type must match the key type of `map`. **Returned value** -- `1` if `map` contains `key`, `0` if not. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `map` contains `key`, `0` if not. [UInt8](../data-types/int-uint.md). **Example** Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapContains(a, 'name') FROM test; +SELECT mapContains(a, 'name') FROM tab; ``` @@ -415,9 +427,11 @@ Result: ## mapKeys -Returns all keys from the `map` parameter. +Returns the keys of a given map. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`. +This function can be optimized by enabling setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). +With enabled setting, the function only reads the [keys](../data-types/map.md#map-subcolumns) subcolumn instead the whole map. +The query `SELECT mapKeys(m) FROM table` is transformed to `SELECT m.keys FROM table`. **Syntax** @@ -427,24 +441,22 @@ mapKeys(map) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). **Returned value** -- Array containing all keys from the `map`. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing all keys from the `map`. [Array](../data-types/array.md). **Example** Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapKeys(a) FROM test; +SELECT mapKeys(a) FROM tab; ``` Result: @@ -458,9 +470,11 @@ Result: ## mapValues -Returns all values from the `map` parameter. +Returns the values of a given map. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`. +This function can be optimized by enabling setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). +With enabled setting, the function only reads the [values](../data-types/map.md#map-subcolumns) subcolumn instead the whole map. +The query `SELECT mapValues(m) FROM table` is transformed to `SELECT m.values FROM table`. **Syntax** @@ -470,24 +484,22 @@ mapValues(map) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). **Returned value** -- Array containing all the values from `map`. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing all the values from `map`. [Array](../data-types/array.md). **Example** Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); +INSERT INTO tab VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'}); -SELECT mapValues(a) FROM test; +SELECT mapValues(a) FROM tab; ``` Result: @@ -508,7 +520,7 @@ mapContainsKeyLike(map, pattern) ``` **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). - `pattern` - String pattern to match. **Returned value** @@ -520,11 +532,11 @@ mapContainsKeyLike(map, pattern) Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); +INSERT INTO tab VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); -SELECT mapContainsKeyLike(a, 'a%') FROM test; +SELECT mapContainsKeyLike(a, 'a%') FROM tab; ``` Result: @@ -538,6 +550,8 @@ Result: ## mapExtractKeyLike +Give a map with string keys and a LIKE pattern, this function returns a map with elements where the key matches the pattern. + **Syntax** ```sql @@ -546,23 +560,23 @@ mapExtractKeyLike(map, pattern) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). - `pattern` - String pattern to match. **Returned value** -- A map contained elements the key of which matches the specified pattern. If there are no elements matched the pattern, it will return an empty map. +- A map containing elements the key matching the specified pattern. If no elements match the pattern, an empty map is returned. **Example** Query: ```sql -CREATE TABLE test (a Map(String,String)) ENGINE = Memory; +CREATE TABLE tab (a Map(String, String)) ENGINE = Memory; -INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); +INSERT INTO tab VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); -SELECT mapExtractKeyLike(a, 'a%') FROM test; +SELECT mapExtractKeyLike(a, 'a%') FROM tab; ``` Result: @@ -576,6 +590,8 @@ Result: ## mapApply +Applies a function to each element of a map. + **Syntax** ```sql @@ -585,11 +601,11 @@ mapApply(func, map) **Arguments** - `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +- `map` — [Map](../data-types/map.md). **Returned value** -- Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element. +- Returns a map obtained from the original map by application of `func(map1[i], ..., mapN[i])` for each element. **Example** @@ -616,6 +632,8 @@ Result: ## mapFilter +Filters a map by applying a function to each map element. + **Syntax** ```sql @@ -625,12 +643,11 @@ mapFilter(func, map) **Arguments** - `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +- `map` — [Map](../data-types/map.md). **Returned value** -- Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0. - +- Returns a map containing only the elements in `map` for which `func(map1[i], ..., mapN[i])` returns something other than 0. **Example** @@ -655,7 +672,6 @@ Result: └─────────────────────┘ ``` - ## mapUpdate **Syntax** @@ -666,8 +682,8 @@ mapUpdate(map1, map2) **Arguments** -- `map1` [Map](../../sql-reference/data-types/map.md). -- `map2` [Map](../../sql-reference/data-types/map.md). +- `map1` [Map](../data-types/map.md). +- `map2` [Map](../data-types/map.md). **Returned value** @@ -691,6 +707,9 @@ Result: ## mapConcat +Concatenates multiple maps based on the equality of their keys. +If elements with the same key exist in more than one input map, all elements are added to the result map, but only the first one is accessible via operator `[]` + **Syntax** ```sql @@ -699,11 +718,11 @@ mapConcat(maps) **Arguments** -- `maps` – Arbitrary number of arguments of [Map](../../sql-reference/data-types/map.md) type. +- `maps` – Arbitrarily many [Maps](../data-types/map.md). **Returned value** -- Returns a map with concatenated maps passed as arguments. If there are same keys in two or more maps, all of them are added to the result map, but only the first one is accessible via operator `[]` +- Returns a map with concatenated maps passed as arguments. **Examples** @@ -737,9 +756,12 @@ Result: ## mapExists(\[func,\], map) -Returns 1 if there is at least one key-value pair in `map` for which `func(key, value)` returns something other than 0. Otherwise, it returns 0. +Returns 1 if at least one key-value pair in `map` exists for which `func(key, value)` returns something other than 0. Otherwise, it returns 0. -Note that the `mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +:::note +`mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). +You can pass a lambda function to it as the first argument. +::: **Example** @@ -751,7 +773,7 @@ SELECT mapExists((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res Result: -```text +``` ┌─res─┐ │ 1 │ └─────┘ @@ -761,7 +783,10 @@ Result: Returns 1 if `func(key, value)` returns something other than 0 for all key-value pairs in `map`. Otherwise, it returns 0. -Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +:::note +Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). +You can pass a lambda function to it as the first argument. +::: **Example** @@ -773,7 +798,7 @@ SELECT mapAll((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res Result: -```text +``` ┌─res─┐ │ 0 │ └─────┘ @@ -781,7 +806,8 @@ Result: ## mapSort(\[func,\], map) -Sorts the elements of the `map` in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. +Sorts the elements of a map in ascending order. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. **Examples** @@ -809,8 +835,8 @@ For more details see the [reference](../../sql-reference/functions/array-functio ## mapReverseSort(\[func,\], map) -Sorts the elements of the `map` in descending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. - +Sorts the elements of a map in descending order. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. **Examples** @@ -834,4 +860,4 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; └──────────────────────────────┘ ``` -For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) for `arrayReverseSort` function. +For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort). diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ea08ffa50e7..5dd1d5ceebe 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -51,7 +51,7 @@ SETTINGS cast_keep_nullable = 1 ## toInt(8\|16\|32\|64\|128\|256) -Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to a value the [Int](../data-types/int-uint.md) data type. This function family includes: - `toInt8(expr)` — Converts to a value of data type `Int8`. - `toInt16(expr)` — Converts to a value of data type `Int16`. @@ -62,7 +62,7 @@ Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/i **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -70,7 +70,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -90,7 +90,7 @@ Result: ## toInt(8\|16\|32\|64\|128\|256)OrZero -Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. +Takes an argument of type [String](../data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. **Example** @@ -151,7 +151,7 @@ Result: ## toUInt(8\|16\|32\|64\|256) -Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to the [UInt](../data-types/int-uint.md) data type. This function family includes: - `toUInt8(expr)` — Converts to a value of data type `UInt8`. - `toUInt16(expr)` — Converts to a value of data type `UInt16`. @@ -161,7 +161,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -169,7 +169,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for negative arguments and for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -203,9 +203,9 @@ Result: ## toDate -Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type. +Converts the argument to [Date](../data-types/date.md) data type. -If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime: +If the argument is [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md), it truncates it and leaves the date component of the DateTime: ```sql SELECT @@ -219,7 +219,7 @@ SELECT └─────────────────────┴───────────────┘ ``` -If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used: +If the argument is a [String](../data-types/string.md), it is parsed as [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). If it was parsed as [DateTime](../data-types/datetime.md), the date component is being used: ```sql SELECT @@ -247,7 +247,7 @@ SELECT └────────────┴───────────────────────────────────────────┘ ``` -If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone: +If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](../data-types/datetime.md), then truncated to [Date](../data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](../data-types/date.md) depends on the timezone: ```sql SELECT @@ -276,7 +276,7 @@ date_Samoa_2: 2022-12-31 The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones. -If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example: +If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](../data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example: ```sql SELECT toDate(12345) @@ -317,7 +317,7 @@ SELECT ## toDateOrZero -The same as [toDate](#todate) but returns lower boundary of [Date](/docs/en/sql-reference/data-types/date.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDate](#todate) but returns lower boundary of [Date](../data-types/date.md) if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -338,7 +338,7 @@ Result: ## toDateOrNull -The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -359,7 +359,7 @@ Result: ## toDateOrDefault -Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](/docs/en/sql-reference/data-types/date.md). +Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](../data-types/date.md). **Syntax** @@ -386,7 +386,7 @@ Result: ## toDateTime -Converts an input value to [DateTime](/docs/en/sql-reference/data-types/datetime.md). +Converts an input value to [DateTime](../data-types/datetime.md). **Syntax** @@ -396,18 +396,18 @@ toDateTime(expr[, time_zone ]) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). -- `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md). +- `expr` — The value. [String](../data-types/string.md), [Int](../data-types/int-uint.md), [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). +- `time_zone` — Time zone. [String](../data-types/string.md). :::note If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp). -If `expr` is a [String](/docs/en/sql-reference/data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time. +If `expr` is a [String](../data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time. Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed. ::: **Returned value** -- A date time. [DateTime](/docs/en/sql-reference/data-types/datetime.md) +- A date time. [DateTime](../data-types/datetime.md) **Example** @@ -428,7 +428,7 @@ Result: ## toDateTimeOrZero -The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](../data-types/datetime.md) if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -449,7 +449,7 @@ Result: ## toDateTimeOrNull -The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -470,7 +470,7 @@ Result: ## toDateTimeOrDefault -Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md). +Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](../data-types/datetime.md). **Syntax** @@ -497,7 +497,7 @@ Result: ## toDate32 -Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. +Converts the argument to the [Date32](../data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](../data-types/date32.md). If the argument has [Date](../data-types/date.md) type, it's borders are taken into account. **Syntax** @@ -507,11 +507,11 @@ toDate32(expr) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md) or [Date](../data-types/date.md). **Returned value** -- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md). +- A calendar date. Type [Date32](../data-types/date32.md). **Example** @@ -539,7 +539,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value); └────────────┴────────────────────────────────────┘ ``` -3. With [Date](/docs/en/sql-reference/data-types/date.md) argument: +3. With [Date](../data-types/date.md) argument: ``` sql SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); @@ -553,7 +553,7 @@ SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); ## toDate32OrZero -The same as [toDate32](#todate32) but returns the min value of [Date32](/docs/en/sql-reference/data-types/date32.md) if an invalid argument is received. +The same as [toDate32](#todate32) but returns the min value of [Date32](../data-types/date32.md) if an invalid argument is received. **Example** @@ -593,7 +593,7 @@ Result: ## toDate32OrDefault -Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received. +Converts the argument to the [Date32](../data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](../data-types/date32.md). If the argument has [Date](../data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received. **Example** @@ -615,7 +615,7 @@ Result: ## toDateTime64 -Converts the argument to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. +Converts the argument to the [DateTime64](../data-types/datetime64.md) data type. **Syntax** @@ -625,15 +625,13 @@ toDateTime64(expr, scale, [timezone]) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md). - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. - `timezone` - Time zone of the specified datetime64 object. **Returned value** -- A calendar date and time of day, with sub-second precision. - -Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). +- A calendar date and time of day, with sub-second precision. [DateTime64](../data-types/datetime64.md). **Example** @@ -694,7 +692,7 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN ## toDecimal(32\|64\|128\|256) -Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. +Converts `value` to the [Decimal](../data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. - `toDecimal32(value, S)` - `toDecimal64(value, S)` @@ -703,7 +701,7 @@ Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) ## toDecimal(32\|64\|128\|256)OrNull -Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: +Converts an input string to a [Nullable(Decimal(P,S))](../data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. - `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. @@ -714,7 +712,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -757,7 +755,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrDefault -Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: +Converts an input string to a [Decimal(P,S)](../data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. @@ -768,7 +766,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -810,7 +808,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrZero -Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes: +Converts an input value to the [Decimal(P,S)](../data-types/decimal.md) data type. This family of functions includes: - `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. @@ -821,7 +819,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -921,7 +919,7 @@ Also see the `toUnixTimestamp` function. ## toFixedString(s, N) -Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N). +Converts a [String](../data-types/string.md) type argument to a [FixedString(N)](../data-types/fixedstring.md) type (a string of fixed length N). If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown. ## toStringCutToZero(s) @@ -970,14 +968,14 @@ toDecimalString(number, scale) **Arguments** -- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), -- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). - * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), - * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. +- `number` — Value to be represented as String, [Int, UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), +- `scale` — Number of fractional digits, [UInt8](../data-types/int-uint.md). + * Maximum scale for [Decimal](../data-types/decimal.md) and [Int, UInt](../data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), + * Maximum scale for [Float](../data-types/float.md) is 60. **Returned value** -- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). +- Input value represented as [String](../data-types/string.md) with given number of fractional digits (scale). The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. **Example** @@ -996,33 +994,689 @@ Result: └─────────────────────────────────────────────┘ ``` -## reinterpretAsUInt(8\|16\|32\|64) +## reinterpretAsUInt8 -## reinterpretAsInt(8\|16\|32\|64) +Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. -## reinterpretAsFloat(32\|64) +**Syntax** + +```sql +reinterpretAsUInt8(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt8. [UInt8](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toInt8(257) AS x, + toTypeName(x), + reinterpretAsUInt8(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ Int8 │ 1 │ UInt8 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt16 + +Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt16(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt16. [UInt16](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt8(257) AS x, + toTypeName(x), + reinterpretAsUInt16(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ UInt8 │ 1 │ UInt16 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt32 + +Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt32(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt32. [UInt32](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt16(257) AS x, + toTypeName(x), + reinterpretAsUInt32(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt16 │ 257 │ UInt32 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt64 + +Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt64(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt64. [UInt64](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt32(257) AS x, + toTypeName(x), + reinterpretAsUInt64(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt32 │ 257 │ UInt64 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt128 + +Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt128(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt128. [UInt128](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt64(257) AS x, + toTypeName(x), + reinterpretAsUInt128(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt64 │ 257 │ UInt128 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt256 + +Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsUInt256(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as UInt256. [UInt256](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt128(257) AS x, + toTypeName(x), + reinterpretAsUInt256(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt128 │ 257 │ UInt256 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt8 + +Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt8(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int8. [Int8](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toUInt8(257) AS x, + toTypeName(x), + reinterpretAsInt8(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ UInt8 │ 1 │ Int8 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt16 + +Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt16(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int16. [Int16](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt8(257) AS x, + toTypeName(x), + reinterpretAsInt16(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ Int8 │ 1 │ Int16 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt32 + +Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt32(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int32. [Int32](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt16(257) AS x, + toTypeName(x), + reinterpretAsInt32(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int16 │ 257 │ Int32 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt64 + +Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt64(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int64. [Int64](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt32(257) AS x, + toTypeName(x), + reinterpretAsInt64(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int32 │ 257 │ Int64 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt128 + +Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt128(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int128. [Int128](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt64(257) AS x, + toTypeName(x), + reinterpretAsInt128(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int64 │ 257 │ Int128 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt256 + +Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsInt256(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Int256. [Int256](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt128(257) AS x, + toTypeName(x), + reinterpretAsInt256(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int128 │ 257 │ Int256 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsFloat32 + +Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsFloat32(x) +``` + +**Parameters** + +- `x`: value to reinterpret as Float32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Float32. [Float32](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT reinterpretAsUInt32(toFloat32(0.2)) as x, reinterpretAsFloat32(x); +``` + +Result: + +```response +┌──────────x─┬─reinterpretAsFloat32(x)─┐ +│ 1045220557 │ 0.2 │ +└────────────┴─────────────────────────┘ +``` + +## reinterpretAsFloat64 + +Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. + +**Syntax** + +```sql +reinterpretAsFloat64(x) +``` + +**Parameters** + +- `x`: value to reinterpret as Float64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Reinterpreted value `x` as Float64. [Float64](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT reinterpretAsUInt64(toFloat64(0.2)) as x, reinterpretAsFloat64(x); +``` + +Result: + +```response +┌───────────────────x─┬─reinterpretAsFloat64(x)─┐ +│ 4596373779694328218 │ 0.2 │ +└─────────────────────┴─────────────────────────┘ +``` ## reinterpretAsDate +Accepts a string, fixed string or numeric value and interprets the bytes as a number in host order (little endian). It returns a date from the interpreted number as the number of days since the beginning of the Unix Epoch. + +**Syntax** + +```sql +reinterpretAsDate(x) +``` + +**Parameters** + +- `x`: number of days since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Date. [Date](../data-types/date.md). + +**Implementation details** + +:::note +If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. +::: + +**Example** + +Query: + +```sql +SELECT reinterpretAsDate(65), reinterpretAsDate('A'); +``` + +Result: + +```response +┌─reinterpretAsDate(65)─┬─reinterpretAsDate('A')─┐ +│ 1970-03-07 │ 1970-03-07 │ +└───────────────────────┴────────────────────────┘ +``` + ## reinterpretAsDateTime -These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch. +These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). Returns a date with time interpreted as the number of seconds since the beginning of the Unix Epoch. + +**Syntax** + +```sql +reinterpretAsDateTime(x) +``` + +**Parameters** + +- `x`: number of seconds since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). + +**Returned value** + +- Date and Time. [DateTime](../data-types/datetime.md). + +**Implementation details** + +:::note +If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. +::: + +**Example** + +Query: + +```sql +SELECT reinterpretAsDateTime(65), reinterpretAsDateTime('A'); +``` + +Result: + +```response +┌─reinterpretAsDateTime(65)─┬─reinterpretAsDateTime('A')─┐ +│ 1970-01-01 01:01:05 │ 1970-01-01 01:01:05 │ +└───────────────────────────┴────────────────────────────┘ +``` ## reinterpretAsString -This function accepts a number or date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +This function accepts a number, date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. + +**Syntax** + +```sql +reinterpretAsString(x) +``` + +**Parameters** + +- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md). + +**Returned value** + +- String containing bytes representing `x`. [String](../data-types/fixedstring.md). + +**Example** + +Query: + +```sql +SELECT + reinterpretAsString(toDateTime('1970-01-01 01:01:05')), + reinterpretAsString(toDate('1970-03-07')); +``` + +Result: + +```response +┌─reinterpretAsString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsString(toDate('1970-03-07'))─┐ +│ A │ A │ +└────────────────────────────────────────────────────────┴───────────────────────────────────────────┘ +``` ## reinterpretAsFixedString -This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. +This function accepts a number, date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. + +**Syntax** + +```sql +reinterpretAsFixedString(x) +``` + +**Parameters** + +- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md). + +**Returned value** + +- Fixed string containing bytes representing `x`. [FixedString](../data-types/fixedstring.md). + +**Example** + +Query: + +```sql +SELECT + reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')), + reinterpretAsFixedString(toDate('1970-03-07')); +``` + +Result: + +```response +┌─reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsFixedString(toDate('1970-03-07'))─┐ +│ A │ A │ +└─────────────────────────────────────────────────────────────┴────────────────────────────────────────────────┘ +``` ## reinterpretAsUUID :::note -In addition to the UUID functions listed here, there is dedicated [UUID function documentation](/docs/en/sql-reference/functions/uuid-functions.md). +In addition to the UUID functions listed here, there is dedicated [UUID function documentation](../functions/uuid-functions.md). ::: -Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. +Accepts a 16 byte string and returns a UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. **Syntax** @@ -1032,11 +1686,11 @@ reinterpretAsUUID(fixed_string) **Arguments** -- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). +- `fixed_string` — Big-endian byte string. [FixedString](../data-types/fixedstring.md/#fixedstring). **Returned value** -- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). +- The UUID type value. [UUID](../data-types/uuid.md/#uuid-data-type). **Examples** @@ -1089,7 +1743,7 @@ reinterpret(x, type) **Arguments** - `x` — Any type. -- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). +- `type` — Destination type. [String](../data-types/string.md). **Returned value** @@ -1128,7 +1782,7 @@ x::t **Arguments** - `x` — A value to convert. May be of any type. -- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). +- `T` — The name of the target data type. [String](../data-types/string.md). - `t` — The target data type. **Returned value** @@ -1177,9 +1831,9 @@ Result: └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘ ``` -Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Conversion to [FixedString (N)](../data-types/fixedstring.md) only works for arguments of type [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported. +Type conversion to [Nullable](../data-types/nullable.md) and back is supported. **Example** @@ -1253,7 +1907,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c ## accurateCastOrNull(x, T) -Converts input value `x` to the specified data type `T`. Always returns [Nullable](/docs/en/sql-reference/data-types/nullable.md) type and returns [NULL](/docs/en/sql-reference/syntax.md/#null-literal) if the casted value is not representable in the target type. +Converts input value `x` to the specified data type `T`. Always returns [Nullable](../data-types/nullable.md) type and returns [NULL](../syntax.md/#null-literal) if the casted value is not representable in the target type. **Syntax** @@ -1362,7 +2016,7 @@ Result: ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) -Converts a Number type argument to an [Interval](/docs/en/sql-reference/data-types/special-data-types/interval.md) data type. +Converts a Number type argument to an [Interval](../data-types/special-data-types/interval.md) data type. **Syntax** @@ -1409,9 +2063,9 @@ Result: ## parseDateTime {#type_conversion_functions-parseDateTime} -Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). +Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). -This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime). +This function is the opposite operation of function [formatDateTime](../functions/date-time-functions.md#date_time_functions-formatDateTime). **Syntax** @@ -1431,7 +2085,7 @@ Returns DateTime values parsed from input string according to a MySQL style form **Supported format specifiers** -All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except: +All format specifiers listed in [formatDateTime](../functions/date-time-functions.md#date_time_functions-formatDateTime) except: - %Q: Quarter (1-4) **Example** @@ -1460,7 +2114,7 @@ Alias: `str_to_date`. Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax. -This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax). +This function is the opposite operation of function [formatDateTimeInJodaSyntax](../functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax). **Syntax** @@ -1480,7 +2134,7 @@ Returns DateTime values parsed from input string according to a Joda style forma **Supported format specifiers** -All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except: +All format specifiers listed in [formatDateTimeInJoda](../functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except: - S: fraction of second - z: time zone - Z: time zone offset/id @@ -1506,7 +2160,7 @@ Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTime ## parseDateTimeBestEffort ## parseDateTime32BestEffort -Converts a date and time in the [String](/docs/en/sql-reference/data-types/string.md) representation to [DateTime](/docs/en/sql-reference/data-types/datetime.md/#data_type-datetime) data type. +Converts a date and time in the [String](../data-types/string.md) representation to [DateTime](../data-types/datetime.md/#data_type-datetime) data type. The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), ClickHouse’s and some other date and time formats. @@ -1518,8 +2172,8 @@ parseDateTimeBestEffort(time_string [, time_zone]) **Arguments** -- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date and time to convert. [String](../data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../data-types/string.md). **Supported non-standard formats** @@ -1535,7 +2189,7 @@ If the year is not specified, it is considered to be equal to the current year. **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](../data-types/datetime.md) data type. **Examples** @@ -1667,7 +2321,7 @@ Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except ## parseDateTime64BestEffort -Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](/docs/en/sql-reference/functions/type-conversion-functions.md/#data_type-datetime) data type. +Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](../functions/type-conversion-functions.md/#data_type-datetime) data type. **Syntax** @@ -1677,13 +2331,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Arguments** -- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date or date with time to convert. [String](../data-types/string.md). +- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](../data-types/datetime.md) data type. **Examples** @@ -1733,7 +2387,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that ## toLowCardinality -Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type. +Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type. To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. @@ -1745,13 +2399,11 @@ toLowCardinality(expr) **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `expr` — [Expression](../syntax.md/#syntax-expressions) resulting in one of the [supported data types](../data-types/index.md/#data_types). **Returned values** -- Result of `expr`. - -Type: `LowCardinality(expr_result_type)` +- Result of `expr`. [LowCardinality](../data-types/lowcardinality.md) of the type of `expr`. **Example** @@ -1979,143 +2631,3 @@ Result: │ 2,"good" │ └───────────────────────────────────────────┘ ``` - -## snowflakeToDateTime - -Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](/docs/en/sql-reference/data-types/datetime.md) format. - -**Syntax** - -``` sql -snowflakeToDateTime(value[, time_zone]) -``` - -**Arguments** - -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). - -**Returned value** - -- The timestamp component of `value` as a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value. - -**Example** - -Query: - -``` sql -SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); -``` - -Result: - -```response - -┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐ -│ 2021-08-15 10:57:56 │ -└──────────────────────────────────────────────────────────────────┘ -``` - -## snowflakeToDateTime64 - -Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format. - -**Syntax** - -``` sql -snowflakeToDateTime64(value[, time_zone]) -``` - -**Arguments** - -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). - -**Returned value** - -- The timestamp component of `value` as a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) with scale = 3, i.e. millisecond precision. - -**Example** - -Query: - -``` sql -SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); -``` - -Result: - -```response - -┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐ -│ 2021-08-15 10:58:19.841 │ -└────────────────────────────────────────────────────────────────────┘ -``` - -## dateTimeToSnowflake - -Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. - -**Syntax** - -``` sql -dateTimeToSnowflake(value) -``` - -**Arguments** - -- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). - -**Returned value** - -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. - -**Example** - -Query: - -``` sql -WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt); -``` - -Result: - -```response -┌─dateTimeToSnowflake(dt)─┐ -│ 1426860702823350272 │ -└─────────────────────────┘ -``` - -## dateTime64ToSnowflake - -Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. - -**Syntax** - -``` sql -dateTime64ToSnowflake(value) -``` - -**Arguments** - -- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). - -**Returned value** - -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. - -**Example** - -Query: - -``` sql -WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64); -``` - -Result: - -```response -┌─dateTime64ToSnowflake(dt64)─┐ -│ 1426860704886947840 │ -└─────────────────────────────┘ -``` diff --git a/docs/en/sql-reference/functions/ulid-functions.md b/docs/en/sql-reference/functions/ulid-functions.md index eb69b1779ae..dc6a803d638 100644 --- a/docs/en/sql-reference/functions/ulid-functions.md +++ b/docs/en/sql-reference/functions/ulid-functions.md @@ -18,7 +18,7 @@ generateULID([x]) **Arguments** -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. **Returned value** @@ -60,14 +60,12 @@ ULIDStringToDateTime(ulid[, timezone]) **Arguments** -- `ulid` — Input ULID. [String](/docs/en/sql-reference/data-types/string.md) or [FixedString(26)](/docs/en/sql-reference/data-types/fixedstring.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `ulid` — Input ULID. [String](../data-types/string.md) or [FixedString(26)](../data-types/fixedstring.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Timestamp with milliseconds precision. - -Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). +- Timestamp with milliseconds precision. [DateTime64(3)](../data-types/datetime64.md). **Usage example** diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index a0b0170721c..8b3e4f44840 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -6,7 +6,33 @@ sidebar_label: URLs # Functions for Working with URLs -All these functions do not follow the RFC. They are maximally simplified for improved performance. +:::note +The functions mentioned in this section are optimized for maximum performance and for the most part do not follow the RFC-3986 standard. Functions which implement RFC-3986 have `RFC` appended to their function name and are generally slower. +::: + +You can generally use the non-`RFC` function variants when working with publicly registered domains that contain neither user strings nor `@` symbols. +The table below details which symbols in a URL can (`✔`) or cannot (`✗`) be parsed by the respective `RFC` and non-`RFC` variants: + +|Symbol | non-`RFC`| `RFC` | +|-------|----------|-------| +| ' ' | ✗ |✗ | +| \t | ✗ |✗ | +| < | ✗ |✗ | +| > | ✗ |✗ | +| % | ✗ |✔* | +| { | ✗ |✗ | +| } | ✗ |✗ | +| \| | ✗ |✗ | +| \\\ | ✗ |✗ | +| ^ | ✗ |✗ | +| ~ | ✗ |✔* | +| [ | ✗ |✗ | +| ] | ✗ |✔ | +| ; | ✗ |✔* | +| = | ✗ |✔* | +| & | ✗ |✔* | + +symbols marked `*` are sub-delimiters in RFC 3986 and allowed for user info following the `@` symbol. ## Functions that Extract Parts of a URL @@ -16,21 +42,23 @@ If the relevant part isn’t present in a URL, an empty string is returned. Extracts the protocol from a URL. -Examples of typical returned values: http, https, ftp, mailto, tel, magnet… +Examples of typical returned values: http, https, ftp, mailto, tel, magnet. ### domain Extracts the hostname from a URL. +**Syntax** + ``` sql domain(url) ``` **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). -The URL can be specified with or without a scheme. Examples: +The URL can be specified with or without a protocol. Examples: ``` text svn+ssh://some.svn-hosting.com:80/repo/trunk @@ -48,10 +76,7 @@ clickhouse.com **Returned values** -- Host name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse can’t parse the input string as a URL. - -Type: `String`. +- Host name if the input string can be parsed as a URL, otherwise an empty string. [String](../data-types/string.md). **Example** @@ -65,9 +90,103 @@ SELECT domain('svn+ssh://some.svn-hosting.com:80/repo/trunk'); └────────────────────────────────────────────────────────┘ ``` +### domainRFC + +Extracts the hostname from a URL. Similar to [domain](#domain), but RFC 3986 conformant. + +**Syntax** + +``` sql +domainRFC(url) +``` + +**Arguments** + +- `url` — URL. [String](../data-types/string.md). + +**Returned values** + +- Host name if the input string can be parsed as a URL, otherwise an empty string. [String](../data-types/string.md). + +**Example** + +``` sql +SELECT + domain('http://user:password@example.com:8080/path?query=value#fragment'), + domainRFC('http://user:password@example.com:8080/path?query=value#fragment'); +``` + +``` text +┌─domain('http://user:password@example.com:8080/path?query=value#fragment')─┬─domainRFC('http://user:password@example.com:8080/path?query=value#fragment')─┐ +│ │ example.com │ +└───────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────────────────────────────────────────┘ +``` + ### domainWithoutWWW -Returns the domain and removes no more than one ‘www.’ from the beginning of it, if present. +Returns the domain without leading `www.` if present. + +**Syntax** + +```sql +domainWithoutWWW(url) +``` + +**Arguments** + +- `url` — URL. [String](../data-types/string.md). + +**Returned values** + +- Domain name if the input string can be parsed as a URL (without leading `www.`), otherwise an empty string. [String](../data-types/string.md). + +**Example** + +``` sql +SELECT domainWithoutWWW('http://paul@www.example.com:80/'); +``` + +``` text +┌─domainWithoutWWW('http://paul@www.example.com:80/')─┐ +│ example.com │ +└─────────────────────────────────────────────────────┘ +``` + +### domainWithoutWWWRFC + +Returns the domain without leading `www.` if present. Similar to [domainWithoutWWW](#domainwithoutwww) but conforms to RFC 3986. + +**Syntax** + +```sql +domainWithoutWWWRFC(url) +``` + +**Arguments** + +- `url` — URL. [String](../data-types/string.md). + +**Returned values** + +- Domain name if the input string can be parsed as a URL (without leading `www.`), otherwise an empty string. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT + domainWithoutWWW('http://user:password@www.example.com:8080/path?query=value#fragment'), + domainWithoutWWWRFC('http://user:password@www.example.com:8080/path?query=value#fragment'); +``` + +Result: + +```response +┌─domainWithoutWWW('http://user:password@www.example.com:8080/path?query=value#fragment')─┬─domainWithoutWWWRFC('http://user:password@www.example.com:8080/path?query=value#fragment')─┐ +│ │ example.com │ +└─────────────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────┘ +``` ### topLevelDomain @@ -79,66 +198,314 @@ topLevelDomain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). -The URL can be specified with or without a scheme. Examples: +:::note +The URL can be specified with or without a protocol. Examples: ``` text svn+ssh://some.svn-hosting.com:80/repo/trunk some.svn-hosting.com:80/repo/trunk https://clickhouse.com/time/ ``` +::: **Returned values** -- Domain name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse cannot parse the input string as a URL. - -Type: `String`. +- Domain name if the input string can be parsed as a URL. Otherwise, an empty string. [String](../../sql-reference/data-types/string.md). **Example** +Query: + ``` sql SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk'); ``` +Result: + ``` text ┌─topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')─┐ │ com │ └────────────────────────────────────────────────────────────────────┘ ``` +### topLevelDomainRFC + +Extracts the the top-level domain from a URL. +Similar to [topLevelDomain](#topleveldomain), but conforms to RFC 3986. + +``` sql +topLevelDomainRFC(url) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). + +:::note +The URL can be specified with or without a protocol. Examples: + +``` text +svn+ssh://some.svn-hosting.com:80/repo/trunk +some.svn-hosting.com:80/repo/trunk +https://clickhouse.com/time/ +``` +::: + +**Returned values** + +- Domain name if the input string can be parsed as a URL. Otherwise, an empty string. [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT topLevelDomain('http://foo:foo%41bar@foo.com'), topLevelDomainRFC('http://foo:foo%41bar@foo.com'); +``` + +Result: + +``` text +┌─topLevelDomain('http://foo:foo%41bar@foo.com')─┬─topLevelDomainRFC('http://foo:foo%41bar@foo.com')─┐ +│ │ com │ +└────────────────────────────────────────────────┴───────────────────────────────────────────────────┘ +``` + ### firstSignificantSubdomain -Returns the “first significant subdomain”. The first significant subdomain is a second-level domain if it is ‘com’, ‘net’, ‘org’, or ‘co’. Otherwise, it is a third-level domain. For example, `firstSignificantSubdomain (‘https://news.clickhouse.com/’) = ‘clickhouse’, firstSignificantSubdomain (‘https://news.clickhouse.com.tr/’) = ‘clickhouse’`. The list of “insignificant” second-level domains and other implementation details may change in the future. +Returns the “first significant subdomain”. +The first significant subdomain is a second-level domain for `com`, `net`, `org`, or `co`, otherwise it is a third-level domain. +For example, `firstSignificantSubdomain (‘https://news.clickhouse.com/’) = ‘clickhouse’, firstSignificantSubdomain (‘https://news.clickhouse.com.tr/’) = ‘clickhouse’`. +The list of "insignificant" second-level domains and other implementation details may change in the future. + +**Syntax** + +```sql +firstSignificantSubdomain(url) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- The first significant subdomain. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT firstSignificantSubdomain('http://www.example.com/a/b/c?a=b') +``` + +Result: + +```reference +┌─firstSignificantSubdomain('http://www.example.com/a/b/c?a=b')─┐ +│ example │ +└───────────────────────────────────────────────────────────────┘ +``` + +### firstSignificantSubdomainRFC + +Returns the “first significant subdomain”. +The first significant subdomain is a second-level domain for `com`, `net`, `org`, or `co`, otherwise it is a third-level domain. +For example, `firstSignificantSubdomain (‘https://news.clickhouse.com/’) = ‘clickhouse’, firstSignificantSubdomain (‘https://news.clickhouse.com.tr/’) = ‘clickhouse’`. +The list of "insignificant" second-level domains and other implementation details may change in the future. +Similar to [firstSignficantSubdomain](#firstsignificantsubdomain) but conforms to RFC 1034. + +**Syntax** + +```sql +firstSignificantSubdomainRFC(url) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- The first significant subdomain. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT + firstSignificantSubdomain('http://user:password@example.com:8080/path?query=value#fragment'), + firstSignificantSubdomainRFC('http://user:password@example.com:8080/path?query=value#fragment'); +``` + +Result: + +```reference +┌─firstSignificantSubdomain('http://user:password@example.com:8080/path?query=value#fragment')─┬─firstSignificantSubdomainRFC('http://user:password@example.com:8080/path?query=value#fragment')─┐ +│ │ example │ +└──────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` ### cutToFirstSignificantSubdomain -Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above). +Returns the part of the domain that includes top-level subdomains up to the [“first significant subdomain”](#firstsignificantsubdomain). -For example: +**Syntax** + +```sql +cutToFirstSignificantSubdomain(url) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain if possible, otherwise returns an empty string. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT + cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/'), + cutToFirstSignificantSubdomain('www.tr'), + cutToFirstSignificantSubdomain('tr'); +``` + +Result: + +```response +┌─cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/')─┬─cutToFirstSignificantSubdomain('www.tr')─┬─cutToFirstSignificantSubdomain('tr')─┐ +│ clickhouse.com.tr │ tr │ │ +└───────────────────────────────────────────────────────────────────┴──────────────────────────────────────────┴──────────────────────────────────────┘ +``` + +### cutToFirstSignificantSubdomainRFC + +Returns the part of the domain that includes top-level subdomains up to the [“first significant subdomain”](#firstsignificantsubdomain). +Similar to [cutToFirstSignificantSubdomain](#cuttofirstsignificantsubdomain) but conforms to RFC 3986. + +**Syntax** + +```sql +cutToFirstSignificantSubdomainRFC(url) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain if possible, otherwise returns an empty string. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT + cutToFirstSignificantSubdomain('http://user:password@example.com:8080'), + cutToFirstSignificantSubdomainRFC('http://user:password@example.com:8080'); +``` + +Result: + +```response +┌─cutToFirstSignificantSubdomain('http://user:password@example.com:8080')─┬─cutToFirstSignificantSubdomainRFC('http://user:password@example.com:8080')─┐ +│ │ example.com │ +└─────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────┘ +``` -- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. -- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`. -- `cutToFirstSignificantSubdomain('tr') = ''`. ### cutToFirstSignificantSubdomainWithWWW -Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www". +Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain", without stripping `www`. -For example: +**Syntax** -- `cutToFirstSignificantSubdomainWithWWW('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. -- `cutToFirstSignificantSubdomainWithWWW('www.tr') = 'www.tr'`. -- `cutToFirstSignificantSubdomainWithWWW('tr') = ''`. +```sql +cutToFirstSignificantSubdomainWithWWW(url) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain (with `www`) if possible, otherwise returns an empty string. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT + cutToFirstSignificantSubdomainWithWWW('https://news.clickhouse.com.tr/'), + cutToFirstSignificantSubdomainWithWWW('www.tr'), + cutToFirstSignificantSubdomainWithWWW('tr'); +``` + +Result: + +```response +┌─cutToFirstSignificantSubdomainWithWWW('https://news.clickhouse.com.tr/')─┬─cutToFirstSignificantSubdomainWithWWW('www.tr')─┬─cutToFirstSignificantSubdomainWithWWW('tr')─┐ +│ clickhouse.com.tr │ www.tr │ │ +└──────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────────────┴─────────────────────────────────────────────┘ +``` + +### cutToFirstSignificantSubdomainWithWWWRFC + +Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain", without stripping `www`. +Similar to [cutToFirstSignificantSubdomainWithWWW](#cuttofirstsignificantsubdomaincustomwithwww) but conforms to RFC 3986. + +**Syntax** + +```sql +cutToFirstSignificantSubdomainWithWWW(url) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain (with "www") if possible, otherwise returns an empty string. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT + cutToFirstSignificantSubdomainWithWWW('http:%2F%2Fwwwww.nova@mail.ru/economicheskiy'), + cutToFirstSignificantSubdomainWithWWWRFC('http:%2F%2Fwwwww.nova@mail.ru/economicheskiy'); +``` + +Result: + +```response +┌─cutToFirstSignificantSubdomainWithWWW('http:%2F%2Fwwwww.nova@mail.ru/economicheskiy')─┬─cutToFirstSignificantSubdomainWithWWWRFC('http:%2F%2Fwwwww.nova@mail.ru/economicheskiy')─┐ +│ │ mail.ru │ +└───────────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────────────────────────────────────────────────────┘ +``` ### cutToFirstSignificantSubdomainCustom -Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name. +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. +Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name. +This function can be useful if you need a fresh TLD list or if you have a custom list. -Can be useful if you need fresh TLD list or you have custom. - -Configuration example: +**Configuration example** ```xml @@ -152,19 +519,17 @@ Configuration example: **Syntax** ``` sql -cutToFirstSignificantSubdomain(URL, TLD) +cutToFirstSignificantSubdomain(url, tld) ``` **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `tld` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain. - -Type: [String](../../sql-reference/data-types/string.md). +- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../../sql-reference/data-types/string.md). **Example** @@ -186,13 +551,39 @@ Result: - [firstSignificantSubdomain](#firstsignificantsubdomain). +### cutToFirstSignificantSubdomainCustomRFC + +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. +Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name. +This function can be useful if you need a fresh TLD list or if you have a custom list. +Similar to [cutToFirstSignificantSubdomainCustom](#cuttofirstsignificantsubdomaincustom) but conforms to RFC 3986. + +**Syntax** + +``` sql +cutToFirstSignificantSubdomainRFC(url, tld) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `tld` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../../sql-reference/data-types/string.md). + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + ### cutToFirstSignificantSubdomainCustomWithWWW -Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Accepts custom TLD list name. +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. +Accepts custom TLD list name. +It can be useful if you need a fresh TLD list or if you have a custom list. -Can be useful if you need fresh TLD list or you have custom. - -Configuration example: +**Configuration example** ```xml @@ -206,19 +597,17 @@ Configuration example: **Syntax** ```sql -cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +cutToFirstSignificantSubdomainCustomWithWWW(url, tld) ``` **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `tld` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. - -Type: [String](../../sql-reference/data-types/string.md). +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../data-types/string.md). **Example** @@ -240,10 +629,36 @@ Result: - [firstSignificantSubdomain](#firstsignificantsubdomain). +### cutToFirstSignificantSubdomainCustomWithWWWRFC + +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. +Accepts custom TLD list name. +It can be useful if you need a fresh TLD list or if you have a custom list. +Similar to [cutToFirstSignificantSubdomainCustomWithWWW](#cuttofirstsignificantsubdomaincustomwithwww) but conforms to RFC 3986. + +**Syntax** + +```sql +cutToFirstSignificantSubdomainCustomWithWWWRFC(url, tld) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `tld` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../../sql-reference/data-types/string.md). + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + ### firstSignificantSubdomainCustom -Returns the first significant subdomain. Accepts customs TLD list name. - +Returns the first significant subdomain. +Accepts customs TLD list name. Can be useful if you need fresh TLD list or you have custom. Configuration example: @@ -260,19 +675,17 @@ Configuration example: **Syntax** ```sql -firstSignificantSubdomainCustom(URL, TLD) +firstSignificantSubdomainCustom(url, tld) ``` **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `tld` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- First significant subdomain. - -Type: [String](../../sql-reference/data-types/string.md). +- First significant subdomain. [String](../../sql-reference/data-types/string.md). **Example** @@ -294,47 +707,156 @@ Result: - [firstSignificantSubdomain](#firstsignificantsubdomain). -### port(URL\[, default_port = 0\]) +### firstSignificantSubdomainCustomRFC -Returns the port or `default_port` if there is no port in the URL (or in case of validation error). +Returns the first significant subdomain. +Accepts customs TLD list name. +Can be useful if you need fresh TLD list or you have custom. +Similar to [firstSignificantSubdomainCustom](#firstsignificantsubdomaincustom) but conforms to RFC 3986. + +**Syntax** + +```sql +firstSignificantSubdomainCustomRFC(url, tld) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `tld` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- First significant subdomain. [String](../../sql-reference/data-types/string.md). + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### port + +Returns the port or `default_port` if the URL contains no port or cannot be parsed. + +**Syntax** + +```sql +port(url [, default_port = 0]) +``` + +**Arguments** + +- `url` — URL. [String](../data-types/string.md). +- `default_port` — The default port number to be returned. [UInt16](../data-types/int-uint.md). + +**Returned value** + +- Port or the default port if there is no port in the URL or in case of a validation error. [UInt16](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT port('http://paul@www.example.com:80/'); +``` + +Result: + +```response +┌─port('http://paul@www.example.com:80/')─┐ +│ 80 │ +└─────────────────────────────────────────┘ +``` + +### portRFC + +Returns the port or `default_port` if the URL contains no port or cannot be parsed. +Similar to [port](#port), but RFC 3986 conformant. + +**Syntax** + +```sql +portRFC(url [, default_port = 0]) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `default_port` — The default port number to be returned. [UInt16](../data-types/int-uint.md). + +**Returned value** + +- Port or the default port if there is no port in the URL or in case of a validation error. [UInt16](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + port('http://user:password@example.com:8080'), + portRFC('http://user:password@example.com:8080'); +``` + +Result: + +```resposne +┌─port('http://user:password@example.com:8080')─┬─portRFC('http://user:password@example.com:8080')─┐ +│ 0 │ 8080 │ +└───────────────────────────────────────────────┴──────────────────────────────────────────────────┘ +``` ### path -Returns the path. Example: `/top/news.html` The path does not include the query string. +Returns the path without query string. + +Example: `/top/news.html`. ### pathFull -The same as above, but including query string and fragment. Example: /top/news.html?page=2#comments +The same as above, but including query string and fragment. + +Example: `/top/news.html?page=2#comments`. ### queryString -Returns the query string. Example: page=1&lr=213. query-string does not include the initial question mark, as well as # and everything after #. +Returns the query string without the initial question mark, `#` and everything after `#`. + +Example: `page=1&lr=213`. ### fragment -Returns the fragment identifier. fragment does not include the initial hash symbol. +Returns the fragment identifier without the initial hash symbol. ### queryStringAndFragment -Returns the query string and fragment identifier. Example: page=1#29390. +Returns the query string and fragment identifier. -### extractURLParameter(URL, name) +Example: `page=1#29390`. -Returns the value of the ‘name’ parameter in the URL, if present. Otherwise, an empty string. If there are many parameters with this name, it returns the first occurrence. This function works under the assumption that the parameter name is encoded in the URL exactly the same way as in the passed argument. +### extractURLParameter(url, name) -### extractURLParameters(URL) +Returns the value of the `name` parameter in the URL, if present, otherwise an empty string is returned. +If there are multiple parameters with this name, the first occurrence is returned. +The function assumes that the parameter in the `url` parameter is encoded in the same way as in the `name` argument. -Returns an array of name=value strings corresponding to the URL parameters. The values are not decoded in any way. +### extractURLParameters(url) -### extractURLParameterNames(URL) +Returns an array of `name=value` strings corresponding to the URL parameters. +The values are not decoded. -Returns an array of name strings corresponding to the names of URL parameters. The values are not decoded in any way. +### extractURLParameterNames(url) -### URLHierarchy(URL) +Returns an array of name strings corresponding to the names of URL parameters. +The values are not decoded. -Returns an array containing the URL, truncated at the end by the symbols /,? in the path and query-string. Consecutive separator characters are counted as one. The cut is made in the position after all the consecutive separator characters. +### URLHierarchy(url) -### URLPathHierarchy(URL) +Returns an array containing the URL, truncated at the end by the symbols /,? in the path and query-string. +Consecutive separator characters are counted as one. +The cut is made in the position after all the consecutive separator characters. + +### URLPathHierarchy(url) The same as above, but without the protocol and host in the result. The / element (root) is not included. @@ -346,9 +868,10 @@ URLPathHierarchy('https://example.com/browse/CONV-6788') = ] ``` -### encodeURLComponent(URL) +### encodeURLComponent(url) Returns the encoded URL. + Example: ``` sql @@ -361,9 +884,10 @@ SELECT encodeURLComponent('http://127.0.0.1:8123/?query=SELECT 1;') AS EncodedUR └──────────────────────────────────────────────────────────┘ ``` -### decodeURLComponent(URL) +### decodeURLComponent(url) Returns the decoded URL. + Example: ``` sql @@ -376,9 +900,10 @@ SELECT decodeURLComponent('http://127.0.0.1:8123/?query=SELECT%201%3B') AS Decod └────────────────────────────────────────┘ ``` -### encodeURLFormComponent(URL) +### encodeURLFormComponent(url) Returns the encoded URL. Follows rfc-1866, space(` `) is encoded as plus(`+`). + Example: ``` sql @@ -391,9 +916,10 @@ SELECT encodeURLFormComponent('http://127.0.0.1:8123/?query=SELECT 1 2+3') AS En └───────────────────────────────────────────────────────────┘ ``` -### decodeURLFormComponent(URL) +### decodeURLFormComponent(url) Returns the decoded URL. Follows rfc-1866, plain plus(`+`) is decoded as space(` `). + Example: ``` sql @@ -413,7 +939,7 @@ Extracts network locality (`username:password@host:port`) from a URL. **Syntax** ``` sql -netloc(URL) +netloc(url) ``` **Arguments** @@ -422,9 +948,7 @@ netloc(URL) **Returned value** -- `username:password@host:port`. - -Type: `String`. +- `username:password@host:port`. [String](../data-types/string.md). **Example** @@ -442,34 +966,35 @@ Result: └───────────────────────────────────────────┘ ``` -## Functions that Remove Part of a URL +## Functions that remove part of a URL If the URL does not have anything similar, the URL remains unchanged. ### cutWWW -Removes no more than one ‘www.’ from the beginning of the URL’s domain, if present. +Removes leading `www.` (if present) from the URL’s domain. ### cutQueryString -Removes query string. The question mark is also removed. +Removes query string, including the question mark. ### cutFragment -Removes the fragment identifier. The number sign is also removed. +Removes the fragment identifier, including the number sign. ### cutQueryStringAndFragment -Removes the query string and fragment identifier. The question mark and number sign are also removed. +Removes the query string and fragment identifier, including the question mark and number sign. -### cutURLParameter(URL, name) +### cutURLParameter(url, name) -Removes the `name` parameter from URL, if present. This function does not encode or decode characters in parameter names, e.g. `Client ID` and `Client%20ID` are treated as different parameter names. +Removes the `name` parameter from a URL, if present. +This function does not encode or decode characters in parameter names, e.g. `Client ID` and `Client%20ID` are treated as different parameter names. **Syntax** ``` sql -cutURLParameter(URL, name) +cutURLParameter(url, name) ``` **Arguments** @@ -479,9 +1004,7 @@ cutURLParameter(URL, name) **Returned value** -- URL with `name` URL parameter removed. - -Type: `String`. +- url with `name` URL parameter removed. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index d1b833c2439..0323ae728a9 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -18,7 +18,7 @@ generateUUIDv4([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -90,7 +90,7 @@ generateUUIDv7([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -126,149 +126,6 @@ SELECT generateUUIDv7(1), generateUUIDv7(2); └──────────────────────────────────────┴──────────────────────────────────────┘ ``` -## generateUUIDv7ThreadMonotonic - -Generates a [UUID](../data-types/uuid.md) of [version 7](https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format-04). - -The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit) to distinguish UUIDs within a millisecond (including a variant field "2", 2 bit), and a random field (32 bits). -For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. -In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. - -This function behaves like [generateUUIDv7](#generateUUIDv7) but gives no guarantee on counter monotony across different simultaneous requests. -Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs. - -``` - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | ver | counter_high_bits | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -|var| counter_low_bits | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| rand_b | -└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ -``` - -:::note -As of April 2024, version 7 UUIDs are in draft status and their layout may change in future. -::: - -**Syntax** - -``` sql -generateUUIDv7ThreadMonotonic([expr]) -``` - -**Arguments** - -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. - -**Returned value** - -A value of type UUIDv7. - -**Usage example** - -First, create a table with a column of type UUID, then insert a generated UUIDv7 into the table. - -``` sql -CREATE TABLE tab (uuid UUID) ENGINE = Memory; - -INSERT INTO tab SELECT generateUUIDv7ThreadMonotonic(); - -SELECT * FROM tab; -``` - -Result: - -```response -┌─────────────────────────────────uuid─┐ -│ 018f05e2-e3b2-70cb-b8be-64b09b626d32 │ -└──────────────────────────────────────┘ -``` - -**Example with multiple UUIDs generated per row** - -```sql -SELECT generateUUIDv7ThreadMonotonic(1), generateUUIDv7ThreadMonotonic(2); - -┌─generateUUIDv7ThreadMonotonic(1)─────┬─generateUUIDv7ThreadMonotonic(2)─────┐ -│ 018f05e1-14ee-7bc5-9906-207153b400b1 │ 018f05e1-14ee-7bc5-9906-2072b8e96758 │ -└──────────────────────────────────────┴──────────────────────────────────────┘ -``` - -## generateUUIDv7NonMonotonic - -Generates a [UUID](../data-types/uuid.md) of [version 7](https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format-04). - -The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits) and a random field (76 bits, including a 2-bit variant field "2"). - -This function is the fastest `generateUUIDv7*` function but it gives no monotonicity guarantees within a timestamp. - -``` - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | ver | rand_a | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -|var| rand_b | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| rand_b | -└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ -``` - -:::note -As of April 2024, version 7 UUIDs are in draft status and their layout may change in future. -::: - -**Syntax** - -``` sql -generateUUIDv7NonMonotonic([expr]) -``` - -**Arguments** - -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. - -**Returned value** - -A value of type UUIDv7. - -**Example** - -First, create a table with a column of type UUID, then insert a generated UUIDv7 into the table. - -``` sql -CREATE TABLE tab (uuid UUID) ENGINE = Memory; - -INSERT INTO tab SELECT generateUUIDv7NonMonotonic(); - -SELECT * FROM tab; -``` - -Result: - -```response -┌─────────────────────────────────uuid─┐ -│ 018f05af-f4a8-778f-beee-1bedbc95c93b │ -└──────────────────────────────────────┘ -``` - -**Example with multiple UUIDs generated per row** - -```sql -SELECT generateUUIDv7NonMonotonic(1), generateUUIDv7NonMonotonic(2); - -┌─generateUUIDv7NonMonotonic(1) ───────┬─generateUUIDv7(2)NonMonotonic────────┐ -│ 018f05b1-8c2e-7567-a988-48d09606ae8c │ 018f05b1-8c2e-7946-895b-fcd7635da9a0 │ -└──────────────────────────────────────┴──────────────────────────────────────┘ -``` - ## empty Checks whether the input UUID is empty. @@ -289,9 +146,7 @@ The function also works for [Arrays](array-functions.md#function-empty) and [Str **Returned value** -- Returns `1` for an empty UUID or `0` for a non-empty UUID. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty UUID or `0` for a non-empty UUID. [UInt8](../data-types/int-uint.md). **Example** @@ -331,9 +186,7 @@ The function also works for [Arrays](array-functions.md#function-notempty) or [S **Returned value** -- Returns `1` for a non-empty UUID or `0` for an empty UUID. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty UUID or `0` for an empty UUID. [UInt8](../data-types/int-uint.md). **Example** @@ -383,8 +236,8 @@ Result: **Arguments** -- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string). -- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md). +- `string` — String of 36 characters or FixedString(36). [String](../syntax.md#string). +- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](../data-types/uuid.md). **Returned value** @@ -482,7 +335,7 @@ Result: ## UUIDStringToNum -Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default). +Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default). **Syntax** @@ -492,7 +345,7 @@ UUIDStringToNum(string[, variant = 1]) **Arguments** -- `string` — A [String](../../sql-reference/syntax.md#syntax-string-literal) of 36 characters or [FixedString](../../sql-reference/syntax.md#syntax-string-literal) +- `string` — A [String](../syntax.md#syntax-string-literal) of 36 characters or [FixedString](../syntax.md#syntax-string-literal) - `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -541,7 +394,7 @@ UUIDNumToString(binary[, variant = 1]) **Arguments** -- `binary` — [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as a binary representation of a UUID. +- `binary` — [FixedString(16)](../data-types/fixedstring.md) as a binary representation of a UUID. - `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -580,7 +433,7 @@ Result: ## UUIDToNum -Accepts a [UUID](../../sql-reference/data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID. +Accepts a [UUID](../data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID. **Syntax** @@ -640,13 +493,11 @@ UUIDv7ToDateTime(uuid[, timezone]) **Arguments** - `uuid` — [UUID](../data-types/uuid.md) of version 7. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. - -Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). +- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](../data-types/datetime64.md). **Usage examples** @@ -674,7 +525,7 @@ Result: └──────────────────────────────────────────────────────────────────────────────────────┘ ``` -## serverUUID() +## serverUUID Returns the random UUID generated during the first start of the ClickHouse server. The UUID is stored in file `uuid` in the ClickHouse server directory (e.g. `/var/lib/clickhouse/`) and retained between server restarts. @@ -686,10 +537,212 @@ serverUUID() **Returned value** -- The UUID of the server. +- The UUID of the server. [UUID](../data-types/uuid.md). -Type: [UUID](../data-types/uuid.md). +## generateSnowflakeID + +Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID). + +The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. +In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. + +Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries. + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|0| timestamp | +├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| | machine_id | machine_seq_num | +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ +``` + +**Syntax** + +``` sql +generateSnowflakeID([expr]) +``` + +**Arguments** + +- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional. + +**Returned value** + +A value of type UInt64. + +**Example** + +First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table. + +``` sql +CREATE TABLE tab (id UInt64) ENGINE = Memory; + +INSERT INTO tab SELECT generateSnowflakeID(); + +SELECT * FROM tab; +``` + +Result: + +```response +┌──────────────────id─┐ +│ 7199081390080409600 │ +└─────────────────────┘ +``` + +**Example with multiple Snowflake IDs generated per row** + +```sql +SELECT generateSnowflakeID(1), generateSnowflakeID(2); + +┌─generateSnowflakeID(1)─┬─generateSnowflakeID(2)─┐ +│ 7199081609652224000 │ 7199081609652224001 │ +└────────────────────────┴────────────────────────┘ +``` + +## snowflakeToDateTime + +Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format. + +**Syntax** + +``` sql +snowflakeToDateTime(value[, time_zone]) +``` + +**Arguments** + +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value. + +**Example** + +Query: + +``` sql +SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); +``` + +Result: + +```response + +┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐ +│ 2021-08-15 10:57:56 │ +└──────────────────────────────────────────────────────────────────┘ +``` + +## snowflakeToDateTime64 + +Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format. + +**Syntax** + +``` sql +snowflakeToDateTime64(value[, time_zone]) +``` + +**Arguments** + +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision. + +**Example** + +Query: + +``` sql +SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); +``` + +Result: + +```response + +┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐ +│ 2021-08-15 10:58:19.841 │ +└────────────────────────────────────────────────────────────────────┘ +``` + +## dateTimeToSnowflake + +Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTimeToSnowflake(value) +``` + +**Arguments** + +- `value` — Date with time. [DateTime](../data-types/datetime.md). + +**Returned value** + +- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. + +**Example** + +Query: + +``` sql +WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt); +``` + +Result: + +```response +┌─dateTimeToSnowflake(dt)─┐ +│ 1426860702823350272 │ +└─────────────────────────┘ +``` + +## dateTime64ToSnowflake + +Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTime64ToSnowflake(value) +``` + +**Arguments** + +- `value` — Date with time. [DateTime64](../data-types/datetime64.md). + +**Returned value** + +- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. + +**Example** + +Query: + +``` sql +WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64); +``` + +Result: + +```response +┌─dateTime64ToSnowflake(dt64)─┐ +│ 1426860704886947840 │ +└─────────────────────────────┘ +``` ## See also -- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) +- [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 043686889c4..03251f0b9af 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -432,13 +432,13 @@ regionIn(lhs, rhs\[, geobase\]) **Parameters** -- `lhs` — Lhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint). -- `rhs` — Rhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint). +- `lhs` — Lhs region ID from the geobase. [UInt32](../data-types/int-uint). +- `rhs` — Rhs region ID from the geobase. [UInt32](../data-types/int-uint). - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. **Returned value** -- 1, if it belongs. [UInt8](../../sql-reference/data-types/int-uint). +- 1, if it belongs. [UInt8](../data-types/int-uint). - 0, if it doesn't belong. **Implementation details** diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index bfad16f8365..0257d21b30f 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -18,7 +18,7 @@ If the left side is a single column that is in the index, and the right side is Don’t list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section [External data for query processing](../../engines/table-engines/special/external-data.md)), then use a subquery. -The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets. +The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or `SELECT` subquery in brackets. ClickHouse allows types to differ in the left and the right parts of `IN` subquery. In this case it converts the left side value to the type of the right side, as if the [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null) function is applied. That means, that the data type becomes [Nullable](../../sql-reference/data-types/nullable.md), and if the conversion cannot be performed, it returns [NULL](../../sql-reference/syntax.md#null-literal). @@ -43,15 +43,16 @@ If the right side of the operator is the name of a table (for example, `UserID I If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query. The subquery may specify more than one column for filtering tuples. + Example: ``` sql SELECT (CounterID, UserID) IN (SELECT CounterID, UserID FROM ...) FROM ... ``` -The columns to the left and right of the IN operator should have the same type. +The columns to the left and right of the `IN` operator should have the same type. -The IN operator and subquery may occur in any part of the query, including in aggregate functions and lambda functions. +The `IN` operator and subquery may occur in any part of the query, including in aggregate functions and lambda functions. Example: ``` sql @@ -81,7 +82,7 @@ ORDER BY EventDate ASC ``` For each day after March 17th, count the percentage of pageviews made by users who visited the site on March 17th. -A subquery in the IN clause is always run just one time on a single server. There are no dependent subqueries. +A subquery in the `IN` clause is always run just one time on a single server. There are no dependent subqueries. ## NULL Processing @@ -120,15 +121,15 @@ FROM t_null ## Distributed Subqueries -There are two options for IN-s with subqueries (similar to JOINs): normal `IN` / `JOIN` and `GLOBAL IN` / `GLOBAL JOIN`. They differ in how they are run for distributed query processing. +There are two options for `IN` operators with subqueries (similar to `JOIN` operators): normal `IN` / `JOIN` and `GLOBAL IN` / `GLOBAL JOIN`. They differ in how they are run for distributed query processing. :::note Remember that the algorithms described below may work differently depending on the [settings](../../operations/settings/settings.md) `distributed_product_mode` setting. ::: -When using the regular IN, the query is sent to remote servers, and each of them runs the subqueries in the `IN` or `JOIN` clause. +When using the regular `IN`, the query is sent to remote servers, and each of them runs the subqueries in the `IN` or `JOIN` clause. -When using `GLOBAL IN` / `GLOBAL JOINs`, first all the subqueries are run for `GLOBAL IN` / `GLOBAL JOINs`, and the results are collected in temporary tables. Then the temporary tables are sent to each remote server, where the queries are run using this temporary data. +When using `GLOBAL IN` / `GLOBAL JOIN`, first all the subqueries are run for `GLOBAL IN` / `GLOBAL JOIN`, and the results are collected in temporary tables. Then the temporary tables are sent to each remote server, where the queries are run using this temporary data. For a non-distributed query, use the regular `IN` / `JOIN`. @@ -152,7 +153,7 @@ SELECT uniq(UserID) FROM local_table and run on each of them in parallel, until it reaches the stage where intermediate results can be combined. Then the intermediate results will be returned to the requestor server and merged on it, and the final result will be sent to the client. -Now let’s examine a query with IN: +Now let’s examine a query with `IN`: ``` sql SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) @@ -166,7 +167,7 @@ This query will be sent to all remote servers as SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) ``` -In other words, the data set in the IN clause will be collected on each server independently, only across the data that is stored locally on each of the servers. +In other words, the data set in the `IN` clause will be collected on each server independently, only across the data that is stored locally on each of the servers. This will work correctly and optimally if you are prepared for this case and have spread data across the cluster servers such that the data for a single UserID resides entirely on a single server. In this case, all the necessary data will be available locally on each server. Otherwise, the result will be inaccurate. We refer to this variation of the query as “local IN”. @@ -182,7 +183,7 @@ This query will be sent to all remote servers as SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) ``` -The subquery will begin running on each remote server. Since the subquery uses a distributed table, the subquery that is on each remote server will be resent to every remote server as +The subquery will begin running on each remote server. Since the subquery uses a distributed table, the subquery that is on each remote server will be resent to every remote server as: ``` sql SELECT UserID FROM local_table WHERE CounterID = 34 @@ -190,33 +191,33 @@ SELECT UserID FROM local_table WHERE CounterID = 34 For example, if you have a cluster of 100 servers, executing the entire query will require 10,000 elementary requests, which is generally considered unacceptable. -In such cases, you should always use GLOBAL IN instead of IN. Let’s look at how it works for the query +In such cases, you should always use `GLOBAL IN` instead of `IN`. Let’s look at how it works for the query: ``` sql SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID GLOBAL IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) ``` -The requestor server will run the subquery +The requestor server will run the subquery: ``` sql SELECT UserID FROM distributed_table WHERE CounterID = 34 ``` -and the result will be put in a temporary table in RAM. Then the request will be sent to each remote server as +and the result will be put in a temporary table in RAM. Then the request will be sent to each remote server as: ``` sql SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 ``` -and the temporary table `_data1` will be sent to every remote server with the query (the name of the temporary table is implementation-defined). +The temporary table `_data1` will be sent to every remote server with the query (the name of the temporary table is implementation-defined). -This is more optimal than using the normal IN. However, keep the following points in mind: +This is more optimal than using the normal `IN`. However, keep the following points in mind: -1. When creating a temporary table, data is not made unique. To reduce the volume of data transmitted over the network, specify DISTINCT in the subquery. (You do not need to do this for a normal IN.) -2. The temporary table will be sent to all the remote servers. Transmission does not account for network topology. For example, if 10 remote servers reside in a datacenter that is very remote in relation to the requestor server, the data will be sent 10 times over the channel to the remote datacenter. Try to avoid large data sets when using GLOBAL IN. +1. When creating a temporary table, data is not made unique. To reduce the volume of data transmitted over the network, specify DISTINCT in the subquery. (You do not need to do this for a normal `IN`.) +2. The temporary table will be sent to all the remote servers. Transmission does not account for network topology. For example, if 10 remote servers reside in a datacenter that is very remote in relation to the requestor server, the data will be sent 10 times over the channel to the remote datacenter. Try to avoid large data sets when using `GLOBAL IN`. 3. When transmitting data to remote servers, restrictions on network bandwidth are not configurable. You might overload the network. -4. Try to distribute data across servers so that you do not need to use GLOBAL IN on a regular basis. -5. If you need to use GLOBAL IN often, plan the location of the ClickHouse cluster so that a single group of replicas resides in no more than one data center with a fast network between them, so that a query can be processed entirely within a single data center. +4. Try to distribute data across servers so that you do not need to use `GLOBAL IN` on a regular basis. +5. If you need to use `GLOBAL IN` often, plan the location of the ClickHouse cluster so that a single group of replicas resides in no more than one data center with a fast network between them, so that a query can be processed entirely within a single data center. It also makes sense to specify a local table in the `GLOBAL IN` clause, in case this local table is only available on the requestor server and you want to use data from it on remote servers. @@ -224,36 +225,38 @@ It also makes sense to specify a local table in the `GLOBAL IN` clause, in case You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is transferred during distributed queries. -This is specially important if the `global in` query returns a large amount of data. Consider the following sql - +This is specially important if the `GLOBAL IN` query returns a large amount of data. Consider the following SQL: + ```sql select * from table1 where col1 global in (select col1 from table2 where ) ``` -If `some_predicate` is not selective enough, it will return large amount of data and cause performance issues. In such cases, it is wise to limit the data transfer over the network. Also, note that [`set_overflow_mode`](../../operations/settings/query-complexity.md#set_overflow_mode) is set to `throw` (by default) meaning that an exception is raised when these thresholds are met. +If `some_predicate` is not selective enough, it will return a large amount of data and cause performance issues. In such cases, it is wise to limit the data transfer over the network. Also, note that [`set_overflow_mode`](../../operations/settings/query-complexity.md#set_overflow_mode) is set to `throw` (by default) meaning that an exception is raised when these thresholds are met. ### Distributed Subqueries and max_parallel_replicas When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed. For example, the following: + ```sql SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100) SETTINGS max_parallel_replicas=3 ``` -is transformed on each server into +is transformed on each server into: ```sql SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100) SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M ``` -where M is between 1 and 3 depending on which replica the local query is executing on. +where `M` is between `1` and `3` depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table. -Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN. +Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`. -One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`. +One workaround if `local_table_2` does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`. If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour. diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index f6fb179d969..320828f0de9 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -4,7 +4,7 @@ sidebar_position: 51 sidebar_label: COMMENT --- -# ALTER TABLE … MODIFY COMMENT +# ALTER TABLE ... MODIFY COMMENT Adds, modifies, or removes comment to the table, regardless if it was set before or not. Comment change is reflected in both [system.tables](../../../operations/system-tables/tables.md) and `SHOW CREATE TABLE` query. diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index b6f45b67d52..af56bec7a11 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE Statement +# ALTER TABLE ... DELETE Statement ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 7961315c193..3cfb99cff83 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -42,7 +42,7 @@ These `ALTER` statements modify entities related to role-based access control: ## Mutations -`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. +`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE ... DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE ... UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. For `*MergeTree` tables mutations execute by **rewriting whole data parts**. There is no atomicity - parts are substituted for mutated parts as soon as they are ready and a `SELECT` query that started executing during a mutation will see data from parts that have already been mutated along with data from parts that have not been mutated yet. diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index ab7d0ca7378..0b300e5849a 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE Statements +# ALTER TABLE ... UPDATE Statements ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index e063b27424e..fb7a5bd7c03 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# ALTER TABLE … MODIFY QUERY Statement +# ALTER TABLE ... MODIFY QUERY Statement -You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. +You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE ... MODIFY QUERY` statement without interrupting ingestion process. This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. @@ -79,8 +79,6 @@ ORDER BY ts, event_type; │ 2020-01-03 00:00:00 │ imp │ │ 2 │ 0 │ └─────────────────────┴────────────┴─────────┴────────────┴──────┘ -SET allow_experimental_alter_materialized_view_structure=1; - ALTER TABLE mv MODIFY QUERY SELECT toStartOfDay(ts) ts, event_type, browser, count() events_cnt, @@ -178,7 +176,6 @@ SELECT * FROM mv; └───┘ ``` ```sql -set allow_experimental_alter_materialized_view_structure=1; ALTER TABLE mv MODIFY QUERY SELECT a * 2 as a FROM src_table; INSERT INTO src_table (a) VALUES (3), (4); SELECT * FROM mv; @@ -198,6 +195,6 @@ SELECT * FROM mv; `ALTER LIVE VIEW ... REFRESH` statement refreshes a [Live view](../create/view.md#live-view). See [Force Live View Refresh](../create/view.md#live-view-alter-refresh). -## ALTER TABLE … MODIFY REFRESH Statement +## ALTER TABLE ... MODIFY REFRESH Statement `ALTER TABLE ... MODIFY REFRESH` statement changes refresh parameters of a [Refreshable Materialized View](../create/view.md#refreshable-materialized-view). See [Changing Refresh Parameters](../create/view.md#changing-refresh-parameters). diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 0edf158e981..628fe1d2875 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -337,7 +337,7 @@ Then, when executing the query `SELECT name FROM users_a WHERE length(name) < 5; Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). -## Column Compression Codecs +## Column Compression Codecs {#column_compression_codec} By default, ClickHouse applies `lz4` compression in the self-managed version, and `zstd` in ClickHouse Cloud. @@ -410,6 +410,10 @@ High compression levels are useful for asymmetric scenarios, like compress once, - For compression, ZSTD_QAT tries to use an Intel® QAT offloading device ([QuickAssist Technology](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)). If no such device was found, it will fallback to ZSTD compression in software. - Decompression is always performed in software. +:::note +ZSTD_QAT is not available in ClickHouse Cloud. +::: + #### DEFLATE_QPL `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 073a3c0d246..1bdf22b35b0 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -85,6 +85,14 @@ Also note, that `materialized_views_ignore_errors` set to `true` by default for If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it. +:::note +Given that `POPULATE` works like `CREATE TABLE ... AS SELECT ...` it has limitations: +- It is not supported with Replicated database +- It is not supported in ClickHouse cloud + +Instead a separate `INSERT ... SELECT` can be used. +::: + A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`. Note that the corresponding conversions are performed independently on each block of inserted data. For example, if `GROUP BY` is set, data is aggregated during insertion, but only within a single packet of inserted data. The data won’t be further aggregated. The exception is when using an `ENGINE` that independently performs data aggregation, such as `SummingMergeTree`. The execution of [ALTER](/docs/en/sql-reference/statements/alter/view.md) queries on materialized views has limitations, for example, you can not update the `SELECT` query, so this might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view. @@ -306,7 +314,7 @@ CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTE Note that elements emitted by a late firing should be treated as updated results of a previous computation. Instead of firing at the end of windows, the window view will fire immediately when the late event arrives. Thus, it will result in multiple outputs for the same window. Users need to take these duplicated results into account or deduplicate them. -You can modify `SELECT` query that was specified in the window view by using `ALTER TABLE … MODIFY QUERY` statement. The data structure resulting in a new `SELECT` query should be the same as the original `SELECT` query when with or without `TO [db.]name` clause. Note that the data in the current window will be lost because the intermediate state cannot be reused. +You can modify `SELECT` query that was specified in the window view by using `ALTER TABLE ... MODIFY QUERY` statement. The data structure resulting in a new `SELECT` query should be the same as the original `SELECT` query when with or without `TO [db.]name` clause. Note that the data in the current window will be lost because the intermediate state cannot be reused. ### Monitoring New Windows diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index a76692cf291..f3dadabd25f 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -73,7 +73,7 @@ Data can be passed to the INSERT in any [format](../../interfaces/formats.md#for INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -For example, the following query format is identical to the basic version of INSERT … VALUES: +For example, the following query format is identical to the basic version of INSERT ... VALUES: ``` sql INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 4ef407a4d13..34c6016235a 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -151,6 +151,14 @@ Result: Query with `INNER` type of a join and conditions with `OR` and `AND`: +:::note + +By default, non-equal conditions are supported as long as they use columns from the same table. +For example, `t1.a = t2.key AND t1.b > 0 AND t2.b > t2.c`, because `t1.b > 0` uses columns only from `t1` and `t2.b > t2.c` uses columns only from `t2`. +However, you can try experimental support for conditions like `t1.a = t2.key AND t1.b > t2.key`, check out section below for more details. + +::: + ``` sql SELECT a, b, val FROM t1 INNER JOIN t2 ON t1.a = t2.key OR t1.b = t2.key AND t2.val > 3; ``` @@ -165,7 +173,7 @@ Result: └───┴────┴─────┘ ``` -## [experimental] Join with inequality conditions +## [experimental] Join with inequality conditions for columns from different tables :::note This feature is experimental. To use it, set `allow_experimental_join_condition` to 1 in your configuration files or by using the `SET` command: diff --git a/docs/en/sql-reference/statements/select/limit.md b/docs/en/sql-reference/statements/select/limit.md index d61a5a44b58..58fdf988bf3 100644 --- a/docs/en/sql-reference/statements/select/limit.md +++ b/docs/en/sql-reference/statements/select/limit.md @@ -17,11 +17,11 @@ If there is no [ORDER BY](../../../sql-reference/statements/select/order-by.md) The number of rows in the result set can also depend on the [limit](../../../operations/settings/settings.md#limit) setting. ::: -## LIMIT … WITH TIES Modifier +## LIMIT ... WITH TIES Modifier When you set `WITH TIES` modifier for `LIMIT n[,m]` and specify `ORDER BY expr_list`, you will get in result first `n` or `n,m` rows and all rows with same `ORDER BY` fields values equal to row at position `n` for `LIMIT n` and `m` for `LIMIT n,m`. -This modifier also can be combined with [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill). +This modifier also can be combined with [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill). For example, the following query diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index d6432a7b4f8..512a58d7cd9 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -283,7 +283,7 @@ In `MaterializedView`-engine tables the optimization works with views like `SELE ## ORDER BY Expr WITH FILL Modifier -This modifier also can be combined with [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties). +This modifier also can be combined with [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties). `WITH FILL` modifier can be set after `ORDER BY expr` with optional `FROM expr`, `TO expr` and `STEP expr` parameters. All missed values of `expr` column will be filled sequentially and other columns will be filled as defaults. diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md index a59ef463419..aa0e0c61c4e 100644 --- a/docs/en/sql-reference/statements/select/with.md +++ b/docs/en/sql-reference/statements/select/with.md @@ -5,21 +5,21 @@ sidebar_label: WITH # WITH Clause -ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. +ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case. An example of such behavior is below ``` sql -with cte_numbers as +with cte_numbers as ( - select - num - from generateRandom('num UInt64', NULL) + select + num + from generateRandom('num UInt64', NULL) limit 1000000 ) select - count() + count() from cte_numbers where num in (select num from cte_numbers) ``` @@ -87,3 +87,226 @@ LIMIT 10; WITH test1 AS (SELECT i + 1, j + 1 FROM test1) SELECT * FROM test1; ``` + +## Recursive Queries + +The optional RECURSIVE modifier allows for a WITH query to refer to its own output. Example: + +**Example:** Sum integers from 1 through 100 + +```sql +WITH RECURSIVE test_table AS ( + SELECT 1 AS number +UNION ALL + SELECT number + 1 FROM test_table WHERE number < 100 +) +SELECT sum(number) FROM test_table; +``` + +``` text +┌─sum(number)─┐ +│ 5050 │ +└─────────────┘ +``` + +The general form of a recursive `WITH` query is always a non-recursive term, then `UNION ALL`, then a recursive term, where only the recursive term can contain a reference to the query's own output. Recursive CTE query is executed as follows: + +1. Evaluate the non-recursive term. Place result of non-recursive term query in a temporary working table. +2. As long as the working table is not empty, repeat these steps: + 1. Evaluate the recursive term, substituting the current contents of the working table for the recursive self-reference. Place result of recursive term query in a temporary intermediate table. + 2. Replace the contents of the working table with the contents of the intermediate table, then empty the intermediate table. + +Recursive queries are typically used to work with hierarchical or tree-structured data. For example, we can write a query that performs tree traversal: + +**Example:** Tree traversal + +First let's create tree table: + +```sql +DROP TABLE IF EXISTS tree; +CREATE TABLE tree +( + id UInt64, + parent_id Nullable(UInt64), + data String +) ENGINE = MergeTree ORDER BY id; + +INSERT INTO tree VALUES (0, NULL, 'ROOT'), (1, 0, 'Child_1'), (2, 0, 'Child_2'), (3, 1, 'Child_1_1'); +``` + +We can traverse those tree with such query: + +**Example:** Tree traversal +```sql +WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, data + FROM tree t + WHERE t.id = 0 +UNION ALL + SELECT t.id, t.parent_id, t.data + FROM tree t, search_tree st + WHERE t.parent_id = st.id +) +SELECT * FROM search_tree; +``` + +```text +┌─id─┬─parent_id─┬─data──────┐ +│ 0 │ ᴺᵁᴸᴸ │ ROOT │ +│ 1 │ 0 │ Child_1 │ +│ 2 │ 0 │ Child_2 │ +│ 3 │ 1 │ Child_1_1 │ +└────┴───────────┴───────────┘ +``` + +### Search order + +To create a depth-first order, we compute for each result row an array of rows that we have already visited: + +**Example:** Tree traversal depth-first order +```sql +WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, data, [t.id] AS path + FROM tree t + WHERE t.id = 0 +UNION ALL + SELECT t.id, t.parent_id, t.data, arrayConcat(path, [t.id]) + FROM tree t, search_tree st + WHERE t.parent_id = st.id +) +SELECT * FROM search_tree ORDER BY path; +``` + +```text +┌─id─┬─parent_id─┬─data──────┬─path────┐ +│ 0 │ ᴺᵁᴸᴸ │ ROOT │ [0] │ +│ 1 │ 0 │ Child_1 │ [0,1] │ +│ 3 │ 1 │ Child_1_1 │ [0,1,3] │ +│ 2 │ 0 │ Child_2 │ [0,2] │ +└────┴───────────┴───────────┴─────────┘ +``` + +To create a breadth-first order, standard approach is to add column that tracks the depth of the search: + +**Example:** Tree traversal breadth-first order +```sql +WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, data, [t.id] AS path, toUInt64(0) AS depth + FROM tree t + WHERE t.id = 0 +UNION ALL + SELECT t.id, t.parent_id, t.data, arrayConcat(path, [t.id]), depth + 1 + FROM tree t, search_tree st + WHERE t.parent_id = st.id +) +SELECT * FROM search_tree ORDER BY depth; +``` + +```text +┌─id─┬─link─┬─data──────┬─path────┬─depth─┐ +│ 0 │ ᴺᵁᴸᴸ │ ROOT │ [0] │ 0 │ +│ 1 │ 0 │ Child_1 │ [0,1] │ 1 │ +│ 2 │ 0 │ Child_2 │ [0,2] │ 1 │ +│ 3 │ 1 │ Child_1_1 │ [0,1,3] │ 2 │ +└────┴──────┴───────────┴─────────┴───────┘ +``` + +### Cycle detection + +First let's create graph table: + +```sql +DROP TABLE IF EXISTS graph; +CREATE TABLE graph +( + from UInt64, + to UInt64, + label String +) ENGINE = MergeTree ORDER BY (from, to); + +INSERT INTO graph VALUES (1, 2, '1 -> 2'), (1, 3, '1 -> 3'), (2, 3, '2 -> 3'), (1, 4, '1 -> 4'), (4, 5, '4 -> 5'); +``` + +We can traverse that graph with such query: + +**Example:** Graph traversal without cycle detection +```sql +WITH RECURSIVE search_graph AS ( + SELECT from, to, label FROM graph g + UNION ALL + SELECT g.from, g.to, g.label + FROM graph g, search_graph sg + WHERE g.from = sg.to +) +SELECT DISTINCT * FROM search_graph ORDER BY from; +``` +```text +┌─from─┬─to─┬─label──┐ +│ 1 │ 4 │ 1 -> 4 │ +│ 1 │ 2 │ 1 -> 2 │ +│ 1 │ 3 │ 1 -> 3 │ +│ 2 │ 3 │ 2 -> 3 │ +│ 4 │ 5 │ 4 -> 5 │ +└──────┴────┴────────┘ +``` + +But if we add cycle in that graph, previous query will fail with `Maximum recursive CTE evaluation depth` error: + +```sql +INSERT INTO graph VALUES (5, 1, '5 -> 1'); + +WITH RECURSIVE search_graph AS ( + SELECT from, to, label FROM graph g +UNION ALL + SELECT g.from, g.to, g.label + FROM graph g, search_graph sg + WHERE g.from = sg.to +) +SELECT DISTINCT * FROM search_graph ORDER BY from; +``` + +```text +Code: 306. DB::Exception: Received from localhost:9000. DB::Exception: Maximum recursive CTE evaluation depth (1000) exceeded, during evaluation of search_graph AS (SELECT from, to, label FROM graph AS g UNION ALL SELECT g.from, g.to, g.label FROM graph AS g, search_graph AS sg WHERE g.from = sg.to). Consider raising max_recursive_cte_evaluation_depth setting.: While executing RecursiveCTESource. (TOO_DEEP_RECURSION) +``` + +The standard method for handling cycles is to compute an array of the already visited nodes: + +**Example:** Graph traversal with cycle detection +```sql +WITH RECURSIVE search_graph AS ( + SELECT from, to, label, false AS is_cycle, [tuple(g.from, g.to)] AS path FROM graph g +UNION ALL + SELECT g.from, g.to, g.label, has(path, tuple(g.from, g.to)), arrayConcat(sg.path, [tuple(g.from, g.to)]) + FROM graph g, search_graph sg + WHERE g.from = sg.to AND NOT is_cycle +) +SELECT * FROM search_graph WHERE is_cycle ORDER BY from; +``` + +```text +┌─from─┬─to─┬─label──┬─is_cycle─┬─path──────────────────────┐ +│ 1 │ 4 │ 1 -> 4 │ true │ [(1,4),(4,5),(5,1),(1,4)] │ +│ 4 │ 5 │ 4 -> 5 │ true │ [(4,5),(5,1),(1,4),(4,5)] │ +│ 5 │ 1 │ 5 -> 1 │ true │ [(5,1),(1,4),(4,5),(5,1)] │ +└──────┴────┴────────┴──────────┴───────────────────────────┘ +``` + +### Infinite queries + +It is also possible to use infinite recursive CTE queries if `LIMIT` is used in outer query: + +**Example:** Infinite recursive CTE query +```sql +WITH RECURSIVE test_table AS ( + SELECT 1 AS number +UNION ALL + SELECT number + 1 FROM test_table +) +SELECT sum(number) FROM (SELECT number FROM test_table LIMIT 100); +``` + +```text +┌─sum(number)─┐ +│ 5050 │ +└─────────────┘ +``` diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 9fec5420f97..7efbff1b42b 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -206,6 +206,32 @@ Enables background data distribution when inserting data into distributed tables SYSTEM START DISTRIBUTED SENDS [db.] [ON CLUSTER cluster_name] ``` +### STOP LISTEN + +Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. + +However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. + +```sql +SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` + +- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. +- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause. +- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause. +- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause. + +### START LISTEN + +Allows new connections to be established on the specified protocols. + +However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. + +```sql +SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` + + ## Managing MergeTree Tables ClickHouse can manage background processes in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. @@ -463,30 +489,16 @@ Will do sync syscall. SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name] ``` +### UNLOAD PRIMARY KEY -## SYSTEM STOP LISTEN - -Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. - -However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. +Unload the primary keys for the given table or for all tables. ```sql -SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +SYSTEM UNLOAD PRIMARY KEY [db.]name ``` -- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. -- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause. -- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause. -- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause. - -## SYSTEM START LISTEN - -Allows new connections to be established on the specified protocols. - -However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. - ```sql -SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +SYSTEM UNLOAD PRIMARY KEY ``` ## Managing Refreshable Materialized Views {#refreshable-materialized-views} @@ -495,7 +507,7 @@ Commands to control background tasks performed by [Refreshable Materialized View Keep an eye on [`system.view_refreshes`](../../operations/system-tables/view_refreshes.md) while using them. -### SYSTEM REFRESH VIEW +### REFRESH VIEW Trigger an immediate out-of-schedule refresh of a given view. @@ -503,7 +515,7 @@ Trigger an immediate out-of-schedule refresh of a given view. SYSTEM REFRESH VIEW [db.]name ``` -### SYSTEM STOP VIEW, SYSTEM STOP VIEWS +### STOP VIEW, STOP VIEWS Disable periodic refreshing of the given view or all refreshable views. If a refresh is in progress, cancel it too. @@ -514,7 +526,7 @@ SYSTEM STOP VIEW [db.]name SYSTEM STOP VIEWS ``` -### SYSTEM START VIEW, SYSTEM START VIEWS +### START VIEW, START VIEWS Enable periodic refreshing for the given view or all refreshable views. No immediate refresh is triggered. @@ -525,22 +537,10 @@ SYSTEM START VIEW [db.]name SYSTEM START VIEWS ``` -### SYSTEM CANCEL VIEW +### CANCEL VIEW If there's a refresh in progress for the given view, interrupt and cancel it. Otherwise do nothing. ```sql SYSTEM CANCEL VIEW [db.]name ``` - -### SYSTEM UNLOAD PRIMARY KEY - -Unload the primary keys for the given table or for all tables. - -```sql -SYSTEM UNLOAD PRIMARY KEY [db.]name -``` - -```sql -SYSTEM UNLOAD PRIMARY KEY -``` \ No newline at end of file diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index 8cd5a6a1424..fb5d4a3055a 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -25,7 +25,7 @@ If the `alter_sync` is set to `2` and some replicas are not active for more than ## TRUNCATE ALL TABLES ``` sql -TRUNCATE ALL TABLES [IF EXISTS] db [ON CLUSTER cluster] +TRUNCATE ALL TABLES FROM [IF EXISTS] db [ON CLUSTER cluster] ``` Removes all data from all tables in a database. diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 3a63811add6..f66178afbb2 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -169,7 +169,7 @@ If your listing of files contains number ranges with leading zeros, use the cons **Example** -Query the total number of rows in files named `file000`, `file001`, … , `file999`: +Query the total number of rows in files named `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32'); diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 80077ecdb33..b891d88df31 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -130,7 +130,7 @@ FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefi If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: -Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 92f904b8841..d65615e7588 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -85,7 +85,7 @@ If your listing of files contains number ranges with leading zeros, use the cons **Example** -Query the data from files named `file000`, `file001`, … , `file999`: +Query the data from files named `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) diff --git a/docs/en/sql-reference/table-functions/loop.md b/docs/en/sql-reference/table-functions/loop.md new file mode 100644 index 00000000000..3a9367b2d10 --- /dev/null +++ b/docs/en/sql-reference/table-functions/loop.md @@ -0,0 +1,55 @@ +# loop + +**Syntax** + +``` sql +SELECT ... FROM loop(database, table); +SELECT ... FROM loop(database.table); +SELECT ... FROM loop(table); +SELECT ... FROM loop(other_table_function(...)); +``` + +**Parameters** + +- `database` — database name. +- `table` — table name. +- `other_table_function(...)` — other table function. + Example: `SELECT * FROM loop(numbers(10));` + `other_table_function(...)` here is `numbers(10)`. + +**Returned Value** + +Infinite loop to return query results. + +**Examples** + +Selecting data from ClickHouse: + +``` sql +SELECT * FROM loop(test_database, test_table); +SELECT * FROM loop(test_database.test_table); +SELECT * FROM loop(test_table); +``` + +Or using other table function: + +``` sql +SELECT * FROM loop(numbers(3)) LIMIT 7; + ┌─number─┐ +1. │ 0 │ +2. │ 1 │ +3. │ 2 │ + └────────┘ + ┌─number─┐ +4. │ 0 │ +5. │ 1 │ +6. │ 2 │ + └────────┘ + ┌─number─┐ +7. │ 0 │ + └────────┘ +``` +``` sql +SELECT * FROM loop(mysql('localhost:3306', 'test', 'test', 'user', 'password')); +... +``` \ No newline at end of file diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 970b3e52882..cbef80371a3 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -137,7 +137,7 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/ If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: -Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) @@ -248,6 +248,25 @@ FROM s3( LIMIT 5; ``` + +## Working with archives + +Suppose that we have several archive files with following URIs on S3: + +- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-10.csv.zip' +- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-11.csv.zip' +- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-12.csv.zip' + +Extracting data from these archives is possible using ::. Globs can be used both in the url part as well as in the part after :: (responsible for the name of a file inside the archive). + +``` sql +SELECT * +FROM s3( + 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-1{0..2}.csv.zip :: *.csv' +); +``` + + ## Virtual Columns {#virtual-columns} - `_path` — Path to the file. Type: `LowCardinalty(String)`. diff --git a/docs/ru/development/build-cross-loongarch.mdx b/docs/ru/development/build-cross-loongarch.mdx new file mode 100644 index 00000000000..62948af38cf --- /dev/null +++ b/docs/ru/development/build-cross-loongarch.mdx @@ -0,0 +1,10 @@ +--- +slug: /ru/development/build-cross-loongarch +sidebar_position: 69 +sidebar_label: Build on Linux for LoongArch64 +title: Build on Linux for LoongArch64 +--- + +import Content from '@site/docs/en/development/build-cross-loongarch.md'; + + diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index cd1297504af..a071d0fb00d 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -1,6 +1,6 @@ --- slug: /ru/development/style -sidebar_position: 69 +sidebar_position: 70 sidebar_label: "Как писать код на C++" --- @@ -57,7 +57,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, …), а также тернарного оператора `?:` ставятся пробелы. +**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, ...), а также тернарного оператора `?:` ставятся пробелы. ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -86,7 +86,7 @@ dst.ClickGoodEvent = click.GoodEvent; При необходимости, оператор может быть перенесён на новую строку. В этом случае, перед ним увеличивается отступ. -**11.** Унарные операторы `--`, `++`, `*`, `&`, … не отделяются от аргумента пробелом. +**11.** Унарные операторы `--`, `++`, `*`, `&`, ... не отделяются от аргумента пробелом. **12.** После запятой ставится пробел, а перед — нет. Аналогично для точки с запятой внутри выражения `for`. @@ -115,7 +115,7 @@ public: **16.** Если на весь файл один `namespace` и кроме него ничего существенного нет, то отступ внутри `namespace` не нужен. -**17.** Если блок для выражения `if`, `for`, `while`, … состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, … +**17.** Если блок для выражения `if`, `for`, `while`, ... состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, ... Если внутренний `statement` содержит фигурные скобки или `else`, то внешний блок следует писать в фигурных скобках. @@ -266,7 +266,7 @@ void executeQuery( Пример взят с ресурса http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/. -**7.** Нельзя писать мусорные комментарии (автор, дата создания…) в начале каждого файла. +**7.** Нельзя писать мусорные комментарии (автор, дата создания...) в начале каждого файла. **8.** Однострочные комментарии начинаются с трёх слешей: `///` , многострочные с `/**`. Такие комментарии считаются «документирующими». diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md index 72087b56652..cf43eef73e3 100644 --- a/docs/ru/engines/table-engines/integrations/hdfs.md +++ b/docs/ru/engines/table-engines/integrations/hdfs.md @@ -103,7 +103,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs **Example** -Создадим таблицу с именами `file000`, `file001`, … , `file999`: +Создадим таблицу с именами `file000`, `file001`, ... , `file999`: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 720aa589122..a1c69df4d0a 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -73,7 +73,7 @@ SELECT * FROM s3_engine_table LIMIT 2; **Пример подстановки 1** -Таблица содержит данные из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Таблица содержит данные из файлов с именами `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md index 46597c94370..c3203804211 100644 --- a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -66,7 +66,7 @@ WHERE table = 'visits' └───────────┴───────────────────┴────────┘ ``` -Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md). +Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER ... PARTITION](../../../sql-reference/statements/alter/partition.md). Столбец `name` содержит названия кусков партиций. Значения из этого столбца можно использовать в запросах [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition). diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index faa492d4d85..49ba229b1d5 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -771,7 +771,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' - В результате вставки (запрос `INSERT`). - В фоновых операциях слияний и [мутаций](../../../sql-reference/statements/alter/index.md#mutations). - При скачивании данных с другой реплики. -- В результате заморозки партиций [ALTER TABLE … FREEZE PARTITION](../../../engines/table-engines/mergetree-family/mergetree.md#alter_freeze-partition). +- В результате заморозки партиций [ALTER TABLE ... FREEZE PARTITION](../../../engines/table-engines/mergetree-family/mergetree.md#alter_freeze-partition). Во всех случаях, кроме мутаций и заморозки партиций, при записи куска выбирается том и диск в соответствии с указанной конфигурацией хранилища: @@ -781,7 +781,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' Мутации и запросы заморозки партиций в реализации используют [жесткие ссылки](https://ru.wikipedia.org/wiki/%D0%96%D1%91%D1%81%D1%82%D0%BA%D0%B0%D1%8F_%D1%81%D1%81%D1%8B%D0%BB%D0%BA%D0%B0). Жесткие ссылки между различными дисками не поддерживаются, поэтому в случае таких операций куски размещаются на тех же дисках, что и исходные. В фоне куски перемещаются между томами на основе информации о занятом месте (настройка `move_factor`) по порядку, в котором указаны тома в конфигурации. Данные никогда не перемещаются с последнего тома и на первый том. Следить за фоновыми перемещениями можно с помощью системных таблиц [system.part_log](../../../engines/table-engines/mergetree-family/mergetree.md#system_tables-part-log) (поле `type = MOVE_PART`) и [system.parts](../../../engines/table-engines/mergetree-family/mergetree.md#system_tables-parts) (поля `path` и `disk`). Также подробная информация о перемещениях доступна в логах сервера. -С помощью запроса [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../engines/table-engines/mergetree-family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке. +С помощью запроса [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](../../../engines/table-engines/mergetree-family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке. Перемещения данных не взаимодействуют с репликацией данных, поэтому на разных репликах одной и той же таблицы могут быть указаны разные политики хранения. diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 881566e5f34..3d9737096f5 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -31,7 +31,7 @@ ClickHouse позволяет отправить на сервер данные, - **--format** - формат данных в файле. Если не указано - используется TabSeparated. Должен быть указан один из следующих параметров: -- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … +- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, ... - **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name. diff --git a/docs/ru/faq/general/olap.md b/docs/ru/faq/general/olap.md index c9021f7c92e..bcfe9663381 100644 --- a/docs/ru/faq/general/olap.md +++ b/docs/ru/faq/general/olap.md @@ -9,13 +9,13 @@ sidebar_position: 100 [OLAP](https://ru.wikipedia.org/wiki/OLAP) (OnLine Analytical Processing) переводится как обработка данных в реальном времени. Это широкий термин, который можно рассмотреть с двух сторон: с технической и с точки зрения бизнеса. Для самого общего понимания можно просто прочитать его с конца: **Processing** - Обрабатываются некие исходные данные… + Обрабатываются некие исходные данные... **Analytical** -: … чтобы получить какие-то аналитические отчеты или новые знания… +: ... чтобы получить какие-то аналитические отчеты или новые знания... **OnLine** -: … в реальном времени, практически без задержек на обработку. +: ... в реальном времени, практически без задержек на обработку. ## OLAP с точки зрения бизнеса {#olap-from-the-business-perspective} diff --git a/docs/ru/getting-started/example-datasets/nyc-taxi.md b/docs/ru/getting-started/example-datasets/nyc-taxi.md index 12d0c18c3a1..a42033e7d41 100644 --- a/docs/ru/getting-started/example-datasets/nyc-taxi.md +++ b/docs/ru/getting-started/example-datasets/nyc-taxi.md @@ -196,7 +196,7 @@ real 75m56.214s (Импорт данных напрямую из Postgres также возможен с использованием `COPY ... TO PROGRAM`.) -К сожалению, все поля, связанные с погодой (precipitation…average_wind_speed) заполнены NULL. Из-за этого мы исключим их из финального набора данных. +К сожалению, все поля, связанные с погодой (precipitation...average_wind_speed) заполнены NULL. Из-за этого мы исключим их из финального набора данных. Для начала мы создадим таблицу на одном сервере. Позже мы сделаем таблицу распределенной. diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 59650826659..aee445da843 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -38,26 +38,6 @@ sudo service clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you've set up a password. ``` -
- -Устаревший способ установки deb-пакетов - -``` bash -sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 - -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ - /etc/apt/sources.list.d/clickhouse.list -sudo apt-get update - -sudo apt-get install -y clickhouse-server clickhouse-client - -sudo service clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. -``` - -
- Чтобы использовать различные [версии ClickHouse](../faq/operations/production.md) в зависимости от ваших потребностей, вы можете заменить `stable` на `lts` или `testing`. Также вы можете вручную скачать и установить пакеты из [репозитория](https://packages.clickhouse.com/deb/pool/stable). @@ -110,22 +90,6 @@ sudo systemctl status clickhouse-server clickhouse-client # илм "clickhouse-client --password" если установлен пароль ``` -
- -Устаревший способ установки rpm-пакетов - -``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo -sudo yum install clickhouse-server clickhouse-client - -sudo /etc/init.d/clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. -``` - -
- Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`. Для непосредственной установки пакетов необходимо выполнить следующие команды: @@ -178,33 +142,6 @@ tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \ sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ``` -
- -Устаревший способ установки из архивов tgz - -``` bash -export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ - grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh -``` -
- Для продуктивных окружений рекомендуется использовать последнюю `stable`-версию. Её номер также можно найти на github с на вкладке https://github.com/ClickHouse/ClickHouse/tags c постфиксом `-stable`. ### Из Docker образа {#from-docker-image} diff --git a/docs/ru/index.md b/docs/ru/index.md index 29f2bbe07fb..02be8912b94 100644 --- a/docs/ru/index.md +++ b/docs/ru/index.md @@ -12,10 +12,10 @@ ClickHouse — столбцовая система управления база | Строка | WatchID | JavaEnable | Title | GoodEvent | EventTime | |--------|-------------|------------|--------------------|-----------|---------------------| -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | ... | ... | ... | ... | ... | То есть, значения, относящиеся к одной строке, физически хранятся рядом. @@ -24,13 +24,13 @@ ClickHouse — столбцовая система управления база В столбцовых СУБД данные хранятся в таком порядке: -| Строка: | #0 | #1 | #2 | #N | +| Строка: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | -| JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | -| GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | +| JavaEnable: | 1 | 0 | 1 | ... | +| Title: | Investor Relations | Contact us | Mission | ... | +| GoodEvent: | 1 | 1 | 1 | ... | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | В примерах изображён только порядок расположения данных. То есть значения из разных столбцов хранятся отдельно, а данные одного столбца — вместе. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index a9280de9c7b..4ed42b6fb22 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -119,6 +119,7 @@ Hello\nworld Hello\ world ``` +`\n\r` (CRLF) поддерживается с помощью настройки `input_format_tsv_crlf_end_of_line`. Второй вариант поддерживается, так как его использует MySQL при записи tab-separated дампа. diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index d1d38a587c6..e82a5a008eb 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -260,7 +260,7 @@ FORMAT Null; Ограничивает количество строк в хэш-таблице, используемой при соединении таблиц. -Параметр применяется к операциям [SELECT… JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). +Параметр применяется к операциям [SELECT... JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата. @@ -277,7 +277,7 @@ FORMAT Null; Ограничивает размер (в байтах) хэш-таблицы, используемой при объединении таблиц. -Параметр применяется к операциям [SELECT… JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). +Параметр применяется к операциям [SELECT... JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 2b3607dcf08..3a70a0bac12 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1859,7 +1859,7 @@ SELECT * FROM test_table ## count_distinct_implementation {#settings-count_distinct_implementation} -Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count). +Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count). Возможные значения: diff --git a/docs/ru/operations/utilities/backupview.md b/docs/ru/operations/utilities/backupview.md index 702fafadc17..671d41cb016 100644 --- a/docs/ru/operations/utilities/backupview.md +++ b/docs/ru/operations/utilities/backupview.md @@ -1,5 +1,5 @@ --- -slug: /en/operations/utilities/backupview +slug: /ru/operations/utilities/backupview title: clickhouse_backupview --- diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index 6463f6bd95d..e6a61d9b381 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -82,7 +82,7 @@ FROM В этом случае необходимо помнить, что границы корзин гистограммы не известны. -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch} +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} Проверяет, содержит ли последовательность событий цепочку, которая соответствует указанному шаблону. @@ -172,7 +172,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} +## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} Вычисляет количество цепочек событий, соответствующих шаблону. Функция обнаруживает только непересекающиеся цепочки событий. Она начинает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index fed0f8b328b..a0a430f7a68 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 ## quantiles {#quantiles} -Синтаксис: `quantiles(level1, level2, …)(x)` +Синтаксис: `quantiles(level1, level2, ...)(x)` Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями. diff --git a/docs/ru/sql-reference/data-types/aggregatefunction.md b/docs/ru/sql-reference/data-types/aggregatefunction.md index e42b467e4af..0481151c7e4 100644 --- a/docs/ru/sql-reference/data-types/aggregatefunction.md +++ b/docs/ru/sql-reference/data-types/aggregatefunction.md @@ -6,9 +6,9 @@ sidebar_label: AggregateFunction # AggregateFunction {#data-type-aggregatefunction} -Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. +Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(...), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. -`AggregateFunction(name, types_of_arguments…)` — параметрический тип данных. +`AggregateFunction(name, types_of_arguments...)` — параметрический тип данных. **Параметры** diff --git a/docs/ru/sql-reference/data-types/fixedstring.md b/docs/ru/sql-reference/data-types/fixedstring.md index d7a4e865903..56a5632f88d 100644 --- a/docs/ru/sql-reference/data-types/fixedstring.md +++ b/docs/ru/sql-reference/data-types/fixedstring.md @@ -21,8 +21,8 @@ sidebar_label: FixedString(N) Примеры значений, которые можно эффективно хранить в столбцах типа `FixedString`: - Двоичное представление IP-адреса (`FixedString(16)` для IPv6). -- Коды языков (ru_RU, en_US … ). -- Коды валют (USD, RUB … ). +- Коды языков (ru_RU, en_US ... ). +- Коды валют (USD, RUB ... ). - Двоичное представление хэшей (`FixedString(16)` для MD5, `FixedString(32)` для SHA256). Для хранения значений UUID используйте тип данных [UUID](uuid.md). diff --git a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md index 4ec8333d563..8fd293a0415 100644 --- a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md @@ -3,7 +3,7 @@ slug: /ru/sql-reference/data-types/nested-data-structures/nested --- # Nested {#nested} -## Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} +## Nested(Name1 Type1, Name2 Type2, ...) {#nestedname1-type1-name2-type2} Вложенная структура данных - это как будто вложенная таблица. Параметры вложенной структуры данных - имена и типы столбцов, указываются так же, как у запроса CREATE. Каждой строке таблицы может соответствовать произвольное количество строк вложенной структуры данных. diff --git a/docs/ru/sql-reference/data-types/tuple.md b/docs/ru/sql-reference/data-types/tuple.md index 8953134d154..9d86c26c563 100644 --- a/docs/ru/sql-reference/data-types/tuple.md +++ b/docs/ru/sql-reference/data-types/tuple.md @@ -4,7 +4,7 @@ sidebar_position: 54 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(T1, T2, …) {#tuplet1-t2} +# Tuple(T1, T2, ...) {#tuplet1-t2} Кортеж из элементов любого [типа](index.md#data_types). Элементы кортежа могут быть одного или разных типов. diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 1f06bdf264a..825e3f06be2 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -161,7 +161,7 @@ SELECT range(5), range(1, 5), range(1, 5, 2); ``` -## array(x1, …), оператор \[x1, …\] {#arrayx1-operator-x1} +## array(x1, ...), оператор \[x1, ...\] {#arrayx1-operator-x1} Создаёт массив из аргументов функции. Аргументы должны быть константами и иметь типы, для которых есть наименьший общий тип. Должен быть передан хотя бы один аргумент, так как иначе непонятно, какого типа создавать массив. То есть, с помощью этой функции невозможно создать пустой массив (для этого используйте функции emptyArray\*, описанные выше). @@ -308,7 +308,7 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) Элементы, равные `NULL`, обрабатываются как обычные значения. -## arrayCount(\[func,\] arr1, …) {#array-count} +## arrayCount(\[func,\] arr1, ...) {#array-count} Возвращает количество элементов массива `arr`, для которых функция `func` возвращает не 0. Если `func` не указана - возвращает количество ненулевых элементов массива. @@ -335,7 +335,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) {#array_functions-arrayenumerate} -Возвращает массив \[1, 2, 3, …, length(arr)\] +Возвращает массив \[1, 2, 3, ..., length(arr)\] Эта функция обычно используется совместно с ARRAY JOIN. Она позволяет, после применения ARRAY JOIN, посчитать что-либо только один раз для каждого массива. Пример: @@ -375,7 +375,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) Также эта функция может быть использована в функциях высшего порядка. Например, с её помощью можно достать индексы массива для элементов, удовлетворяющих некоторому условию. -## arrayEnumerateUniq(arr, …) {#arrayenumerateuniqarr} +## arrayEnumerateUniq(arr, ...) {#arrayenumerateuniqarr} Возвращает массив, такого же размера, как исходный, где для каждого элемента указано, какой он по счету среди элементов с таким же значением. Например: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\]. @@ -597,7 +597,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res; Элементы массива равные `NULL` обрабатываются как обычные значения. -## arraySort(\[func,\] arr, …) {#array_functions-sort} +## arraySort(\[func,\] arr, ...) {#array_functions-sort} Возвращает массив `arr`, отсортированный в восходящем порядке. Если задана функция `func`, то порядок сортировки определяется результатом применения этой функции на элементы массива `arr`. Если `func` принимает несколько аргументов, то в функцию `arraySort` нужно передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания `arraySort`. @@ -698,11 +698,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). ::: -## arrayPartialSort(\[func,\] limit, arr, …) {#array_functions-sort} +## arrayPartialSort(\[func,\] limit, arr, ...) {#array_functions-sort} То же, что и `arraySort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в возрастающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. -## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#array_functions-reverse-sort} Возвращает массив `arr`, отсортированный в нисходящем порядке. Если указана функция `func`, то массив `arr` сначала сортируется в порядке, который определяется функцией `func`, а затем отсортированный массив переворачивается. Если функция `func` принимает несколько аргументов, то в функцию `arrayReverseSort` необходимо передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания функции `arrayReverseSort`. @@ -803,11 +803,11 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayPartialReverseSort(\[func,\] limit, arr, …) {#array_functions-sort} +## arrayPartialReverseSort(\[func,\] limit, arr, ...) {#array_functions-sort} То же, что и `arrayReverseSort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в убывающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. -## arrayUniq(arr, …) {#array-functions-arrayuniq} +## arrayUniq(arr, ...) {#array-functions-arrayuniq} Если передан один аргумент, считает количество разных элементов в массиве. Если передано несколько аргументов, считает количество разных кортежей из элементов на соответствующих позициях в нескольких массивах. @@ -1174,7 +1174,7 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]); └──────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) {#array-map} +## arrayMap(func, arr1, ...) {#array-map} Возвращает массив, полученный на основе результатов применения функции `func` к каждому элементу массива `arr`. @@ -1204,7 +1204,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res; Функция `arrayMap` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFilter(func, arr1, …) {#array-filter} +## arrayFilter(func, arr1, ...) {#array-filter} Возвращает массив, содержащий только те элементы массива `arr1`, для которых функция `func` возвращает не 0. @@ -1237,7 +1237,7 @@ SELECT Функция `arrayFilter` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFill(func, arr1, …) {#array-fill} +## arrayFill(func, arr1, ...) {#array-fill} Перебирает `arr1` от первого элемента к последнему и заменяет `arr1[i]` на `arr1[i - 1]`, если `func` вернула 0. Первый элемент `arr1` остаётся неизменным. @@ -1255,7 +1255,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, Функция `arrayFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayReverseFill(func, arr1, …) {#array-reverse-fill} +## arrayReverseFill(func, arr1, ...) {#array-reverse-fill} Перебирает `arr1` от последнего элемента к первому и заменяет `arr1[i]` на `arr1[i + 1]`, если `func` вернула 0. Последний элемент `arr1` остаётся неизменным. @@ -1273,7 +1273,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, Функция `arrayReverseFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arraySplit(func, arr1, …) {#array-split} +## arraySplit(func, arr1, ...) {#array-split} Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в левую часть. Массив не разбивается по первому элементу. @@ -1291,7 +1291,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Функция `arraySplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayReverseSplit(func, arr1, …) {#array-reverse-split} +## arrayReverseSplit(func, arr1, ...) {#array-reverse-split} Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в правую часть. Массив не разбивается по последнему элементу. @@ -1309,25 +1309,25 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Функция `arrayReverseSplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +## arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} Возвращает 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе возвращает 0. Функция `arrayExists` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. -## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +## arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} Возвращает 1, если для всех элементов массива `arr`, функция `func` возвращает не 0. Иначе возвращает 0. Функция `arrayAll` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. -## arrayFirst(func, arr1, …) {#array-first} +## arrayFirst(func, arr1, ...) {#array-first} Возвращает первый элемент массива `arr1`, для которого функция func возвращает не 0. Функция `arrayFirst` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFirstIndex(func, arr1, …) {#array-first-index} +## arrayFirstIndex(func, arr1, ...) {#array-first-index} Возвращает индекс первого элемента массива `arr1`, для которого функция func возвращает не 0. @@ -1599,7 +1599,7 @@ SELECT arraySum(x -> x*x, [2, 3]) AS res; └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +## arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 56ae4359bf1..bcc5f807c32 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -559,7 +559,7 @@ SELECT Описание режимов (mode): -| Mode | Первый день недели | Диапазон | Неделя 1 это первая неделя … | +| Mode | Первый день недели | Диапазон | Неделя 1 это первая неделя ... | | ----------- | -------- | -------- | ------------------ | |0|Воскресенье|0-53|с воскресеньем в этом году |1|Понедельник|0-53|с 4-мя или более днями в этом году diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md index 123f40ce05d..18f625bf80f 100644 --- a/docs/ru/sql-reference/functions/json-functions.md +++ b/docs/ru/sql-reference/functions/json-functions.md @@ -88,7 +88,7 @@ SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1 SELECT isValidJSON('not a json') = 0 ``` -## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys} +## JSONHas(json\[, indices_or_keys\]...) {#jsonhasjson-indices-or-keys} Если значение существует в документе JSON, то возвращается `1`. @@ -121,7 +121,7 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' ``` -## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys} +## JSONLength(json\[, indices_or_keys\]...) {#jsonlengthjson-indices-or-keys} Возвращает длину массива JSON или объекта JSON. @@ -134,7 +134,7 @@ SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 ``` -## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys} +## JSONType(json\[, indices_or_keys\]...) {#jsontypejson-indices-or-keys} Возвращает тип значения JSON. @@ -148,13 +148,13 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' ``` -## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys} +## JSONExtractUInt(json\[, indices_or_keys\]...) {#jsonextractuintjson-indices-or-keys} -## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys} +## JSONExtractInt(json\[, indices_or_keys\]...) {#jsonextractintjson-indices-or-keys} -## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys} +## JSONExtractFloat(json\[, indices_or_keys\]...) {#jsonextractfloatjson-indices-or-keys} -## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys} +## JSONExtractBool(json\[, indices_or_keys\]...) {#jsonextractbooljson-indices-or-keys} Парсит JSON и извлекает значение. Эти функции аналогичны функциям `visitParam`. @@ -168,7 +168,7 @@ SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200 SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 ``` -## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys} +## JSONExtractString(json\[, indices_or_keys\]...) {#jsonextractstringjson-indices-or-keys} Парсит JSON и извлекает строку. Эта функция аналогична функции `visitParamExtractString`. @@ -186,7 +186,7 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = '' SELECT JSONExtractString('{"abc":"hello}', 'abc') = '' ``` -## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type} +## JSONExtract(json\[, indices_or_keys...\], Return_type) {#jsonextractjson-indices-or-keys-return-type} Парсит JSON и извлекает значение с заданным типом данных. @@ -207,7 +207,7 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' ``` -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} +## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} Разбор пар ключ-значение из JSON, где значение имеет тип данных ClickHouse. @@ -255,7 +255,7 @@ text └────────────────────────────────────────────────────────────┘ ``` -## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys} +## JSONExtractRaw(json\[, indices_or_keys\]...) {#jsonextractrawjson-indices-or-keys} Возвращает часть JSON в виде строки, содержащей неразобранную подстроку. @@ -267,7 +267,7 @@ text SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]'; ``` -## JSONExtractArrayRaw(json\[, indices_or_keys\]…) {#jsonextractarrayrawjson-indices-or-keys} +## JSONExtractArrayRaw(json\[, indices_or_keys\]...) {#jsonextractarrayrawjson-indices-or-keys} Возвращает массив из элементов JSON массива, каждый из которых представлен в виде строки с неразобранными подстроками из JSON. diff --git a/docs/ru/sql-reference/functions/math-functions.md b/docs/ru/sql-reference/functions/math-functions.md index 367451a5b32..caacbb216bf 100644 --- a/docs/ru/sql-reference/functions/math-functions.md +++ b/docs/ru/sql-reference/functions/math-functions.md @@ -304,8 +304,8 @@ atan2(y, x) **Аргументы** -- `y` — координата y точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64). -- `x` — координата х точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `y` — координата y точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md). +- `x` — координата х точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md). **Возвращаемое значение** @@ -341,8 +341,8 @@ hypot(x, y) **Аргументы** -- `x` — первый катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64). -- `y` — второй катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — первый катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md). +- `y` — второй катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md). **Возвращаемое значение** diff --git a/docs/ru/sql-reference/functions/null-functions.md b/docs/ru/sql-reference/functions/null-functions.md new file mode 100644 index 00000000000..9b045d8a97d --- /dev/null +++ b/docs/ru/sql-reference/functions/null-functions.md @@ -0,0 +1,311 @@ +--- +slug: /ru/sql-reference/functions/null-functions +sidebar_position: 63 +sidebar_label: "Функции для работы с Nullable-аргументами" +--- + +# Функции для работы с Nullable-аргументами {#funktsii-dlia-raboty-s-nullable-argumentami} + +## isNull {#isnull} + +Проверяет является ли аргумент [NULL](../../sql-reference/syntax.md#null-literal). + +``` sql +isNull(x) +``` + +Синоним: `ISNULL`. + +**Аргументы** + +- `x` — значение с не составным типом данных. + +**Возвращаемое значение** + +- `1`, если `x` — `NULL`. +- `0`, если `x` — не `NULL`. + +**Пример** + +Входная таблица + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` + +Запрос + +``` sql +SELECT x FROM t_null WHERE isNull(y); +``` + +``` text +┌─x─┐ +│ 1 │ +└───┘ +``` + +## isNotNull {#isnotnull} + +Проверяет не является ли аргумент [NULL](../../sql-reference/syntax.md#null-literal). + +``` sql +isNotNull(x) +``` + +**Аргументы** + +- `x` — значение с не составным типом данных. + +**Возвращаемое значение** + +- `0`, если `x` — `NULL`. +- `1`, если `x` — не `NULL`. + +**Пример** + +Входная таблица + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` + +Запрос + +``` sql +SELECT x FROM t_null WHERE isNotNull(y); +``` + +``` text +┌─x─┐ +│ 2 │ +└───┘ +``` + +## coalesce {#coalesce} + +Последовательно слева-направо проверяет являются ли переданные аргументы `NULL` и возвращает первый не `NULL`. + +``` sql +coalesce(x,...) +``` + +**Аргументы** + +- Произвольное количество параметров не составного типа. Все параметры должны быть совместимы по типу данных. + +**Возвращаемые значения** + +- Первый не `NULL` аргумент. +- `NULL`, если все аргументы — `NULL`. + +**Пример** + +Рассмотрим адресную книгу, в которой может быть указано несколько способов связи с клиентом. + +``` text +┌─name─────┬─mail─┬─phone─────┬──icq─┐ +│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ +│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +└──────────┴──────┴───────────┴──────┘ +``` + +Поля `mail` и `phone` имеют тип String, а поле `icq` — `UInt32`, его необходимо будет преобразовать в `String`. + +Получим из адресной книги первый доступный способ связаться с клиентом: + +``` sql +SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook; +``` + +``` text +┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐ +│ client 1 │ 123-45-67 │ +│ client 2 │ ᴺᵁᴸᴸ │ +└──────────┴──────────────────────────────────────────────────────┘ +``` + +## ifNull {#ifnull} + +Возвращает альтернативное значение, если основной аргумент — `NULL`. + +``` sql +ifNull(x,alt) +``` + +**Аргументы** + +- `x` — значение для проверки на `NULL`, +- `alt` — значение, которое функция вернёт, если `x` — `NULL`. + +**Возвращаемые значения** + +- Значение `x`, если `x` — не `NULL`. +- Значение `alt`, если `x` — `NULL`. + +**Пример** + +``` sql +SELECT ifNull('a', 'b'); +``` + +``` text +┌─ifNull('a', 'b')─┐ +│ a │ +└──────────────────┘ +``` + +``` sql +SELECT ifNull(NULL, 'b'); +``` + +``` text +┌─ifNull(NULL, 'b')─┐ +│ b │ +└───────────────────┘ +``` + +## nullIf {#nullif} + +Возвращает `NULL`, если аргументы равны. + +``` sql +nullIf(x, y) +``` + +**Аргументы** + +`x`, `y` — значения для сравнивания. Они должны быть совместимых типов, иначе ClickHouse сгенерирует исключение. + +**Возвращаемые значения** + +- `NULL`, если аргументы равны. +- Значение `x`, если аргументы не равны. + +**Пример** + +``` sql +SELECT nullIf(1, 1); +``` + +``` text +┌─nullIf(1, 1)─┐ +│ ᴺᵁᴸᴸ │ +└──────────────┘ +``` + +``` sql +SELECT nullIf(1, 2); +``` + +``` text +┌─nullIf(1, 2)─┐ +│ 1 │ +└──────────────┘ +``` + +## assumeNotNull {#assumenotnull} + +Приводит значение типа [Nullable](../../sql-reference/functions/functions-for-nulls.md) к не `Nullable`, если значение не `NULL`. + +``` sql +assumeNotNull(x) +``` + +**Аргументы** + +- `x` — исходное значение. + +**Возвращаемые значения** + +- Исходное значение с не `Nullable` типом, если оно — не `NULL`. +- Неспецифицированный результат, зависящий от реализации, если исходное значение — `NULL`. + +**Пример** + +Рассмотрим таблицу `t_null`. + +``` sql +SHOW CREATE TABLE t_null; +``` + +``` text +┌─statement─────────────────────────────────────────────────────────────────┐ +│ CREATE TABLE default.t_null ( x Int8, y Nullable(Int8)) ENGINE = TinyLog │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` + +Применим функцию `assumeNotNull` к столбцу `y`. + +``` sql +SELECT assumeNotNull(y) FROM t_null; +``` + +``` text +┌─assumeNotNull(y)─┐ +│ 0 │ +│ 3 │ +└──────────────────┘ +``` + +``` sql +SELECT toTypeName(assumeNotNull(y)) FROM t_null; +``` + +``` text +┌─toTypeName(assumeNotNull(y))─┐ +│ Int8 │ +│ Int8 │ +└──────────────────────────────┘ +``` + +## toNullable {#tonullable} + +Преобразует тип аргумента к `Nullable`. + +``` sql +toNullable(x) +``` + +**Аргументы** + +- `x` — значение произвольного не составного типа. + +**Возвращаемое значение** + +- Входное значение с типом не `Nullable`. + +**Пример** + +``` sql +SELECT toTypeName(10); +``` + +``` text +┌─toTypeName(10)─┐ +│ UInt8 │ +└────────────────┘ +``` + +``` sql +SELECT toTypeName(toNullable(10)); +``` + +``` text +┌─toTypeName(toNullable(10))─┐ +│ Nullable(UInt8) │ +└────────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 835aed934d5..f7637cfa3f7 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -286,7 +286,7 @@ SELECT byteSize(NULL, 1, 0.3, ''); Превращает константу в полноценный столбец, содержащий только одно значение. В ClickHouse полноценные столбцы и константы представлены в памяти по-разному. Функции по-разному работают для аргументов-констант и обычных аргументов (выполняется разный код), хотя результат почти всегда должен быть одинаковым. Эта функция предназначена для отладки такого поведения. -## ignore(…) {#ignore} +## ignore(...) {#ignore} Принимает любые аргументы, в т.ч. `NULL`, всегда возвращает 0. При этом, аргумент всё равно вычисляется. Это может использоваться для бенчмарков. diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 276dfc2ef20..fc258f7b4cf 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -358,7 +358,7 @@ SELECT repeat('abc', 10); Разворачивает последовательность кодовых точек Unicode, при допущении, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Иначе — что-то делает (не кидает исключение). -## format(pattern, s0, s1, …) {#format} +## format(pattern, s0, s1, ...) {#format} Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` — упрощенная версия шаблона в языке Python. Шаблон содержит «заменяющие поля», которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{ '{{' }}` или `{{ '}}' }}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). @@ -493,7 +493,7 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2); ## base58Encode(plaintext), base58Decode(encoded_text) {#base58} -Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием стандартного алфавита Bitcoin. +Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://datatracker.ietf.org/doc/html/draft-msporny-base58) с использованием стандартного алфавита Bitcoin. **Синтаксис** diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 4f9ae4428a4..53da9a6e791 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -311,19 +311,19 @@ Result: Смотрите `multiSearchAllPositions`. -## multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} +## multiSearchFirstPosition(haystack, \[needle1, needle2, ..., needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} Так же, как и `position`, только возвращает оффсет первого вхождения любого из needles. Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchFirstPositionCaseInsensitive, multiSearchFirstPositionUTF8, multiSearchFirstPositionCaseInsensitiveUTF8`. -## multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} +## multiSearchFirstIndex(haystack, \[needle1, needle2, ..., needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} Возвращает индекс `i` (нумерация с единицы) первой найденной строки needlei в строке `haystack` и 0 иначе. Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchFirstIndexCaseInsensitive, multiSearchFirstIndexUTF8, multiSearchFirstIndexCaseInsensitiveUTF8`. -## multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) {#function-multisearchany} +## multiSearchAny(haystack, \[needle1, needle2, ..., needlen\]) {#function-multisearchany} Возвращает 1, если хотя бы одна подстрока needlei нашлась в строке `haystack` и 0 иначе. @@ -343,30 +343,30 @@ Result: Регулярное выражение работает со строкой как с набором байт. Регулярное выражение не может содержать нулевые байты. Для шаблонов на поиск подстроки в строке, лучше используйте LIKE или position, так как они работают существенно быстрее. -## multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} +## multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется библиотека [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее. :::note Примечание Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. ::: -## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} +## multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, только возвращает любой индекс подходящего регулярного выражения. -## multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchallindiceshaystack-pattern1-pattern2-patternn} +## multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchallindiceshaystack-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке. -## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с non-fuzzy вариантами. -## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} То же, что и `multiFuzzyMatchAny`, только возвращает любой индекс подходящего регулярного выражения в пределах константного редакционного расстояния. -## multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchallindiceshaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchallindiceshaystack-distance-pattern1-pattern2-patternn} То же, что и `multiFuzzyMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке в пределах константного редакционного расстояния. diff --git a/docs/ru/sql-reference/functions/tuple-functions.md b/docs/ru/sql-reference/functions/tuple-functions.md index c702e5d00b1..70ae44aa627 100644 --- a/docs/ru/sql-reference/functions/tuple-functions.md +++ b/docs/ru/sql-reference/functions/tuple-functions.md @@ -9,15 +9,15 @@ sidebar_label: Функции для работы с кортежами ## tuple {#tuple} Функция, позволяющая сгруппировать несколько столбцов. -Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит. +Для столбцов, имеющих типы T1, T2, ... возвращает кортеж типа Tuple(T1, T2, ...), содержащий эти столбцы. Выполнение функции ничего не стоит. Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу. -С помощью функции реализуется оператор `(x, y, …)`. +С помощью функции реализуется оператор `(x, y, ...)`. **Синтаксис** ``` sql -tuple(x, y, …) +tuple(x, y, ...) ``` ## tupleElement {#tupleelement} diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 3c6e6151ef8..087891f4347 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -14,7 +14,7 @@ sidebar_label: "Функции для работы с URL" ### protocol {#protocol} -Возвращает протокол. Примеры: http, ftp, mailto, magnet… +Возвращает протокол. Примеры: http, ftp, mailto, magnet... ### domain {#domain} diff --git a/docs/ru/sql-reference/functions/uuid-functions.md b/docs/ru/sql-reference/functions/uuid-functions.md index a7fe6592338..7fe90263599 100644 --- a/docs/ru/sql-reference/functions/uuid-functions.md +++ b/docs/ru/sql-reference/functions/uuid-functions.md @@ -112,113 +112,6 @@ SELECT generateUUIDv7(1), generateUUIDv7(2) └──────────────────────────────────────┴──────────────────────────────────────┘ ``` -## generateUUIDv7ThreadMonotonic {#uuidv7threadmonotonic-function-generate} - -Генерирует идентификатор [UUID версии 7](https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format-04). Генерируемый UUID состоит из 48-битной временной метки (Unix time в миллисекундах), маркеров версии 7 и варианта 2, монотонно возрастающего счётчика для данной временной метки и случайных данных в указанной ниже последовательности. Для каждой новой временной метки счётчик стартует с нового случайного значения, а для следующих UUIDv7 он увеличивается на единицу. В случае переполнения счётчика временная метка принудительно увеличивается на 1, и счётчик снова стартует со случайного значения. Данная функция является ускоренным аналогом функции `generateUUIDv7` за счёт потери гарантии монотонности счётчика при одной и той же метке времени между одновременно исполняемыми разными запросами. Монотонность счётчика гарантируется только в пределах одного треда, исполняющего данную функцию для генерации нескольких UUID. - -**Синтаксис** - -``` sql -generateUUIDv7ThreadMonotonic([x]) -``` - -**Аргументы** - -- `x` — [выражение](../syntax.md#syntax-expressions), возвращающее значение одного из [поддерживаемых типов данных](../data-types/index.md#data_types). Значение используется, чтобы избежать [склейки одинаковых выражений](index.md#common-subexpression-elimination), если функция вызывается несколько раз в одном запросе. Необязательный параметр. - -**Возвращаемое значение** - -Значение типа [UUID](../../sql-reference/functions/uuid-functions.md). - -**Пример использования** - -Этот пример демонстрирует, как создать таблицу с UUID-колонкой и добавить в нее сгенерированный UUIDv7. - -``` sql -CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog - -INSERT INTO t_uuid SELECT generateUUIDv7ThreadMonotonic() - -SELECT * FROM t_uuid -``` - -``` text -┌────────────────────────────────────x─┐ -│ 018f05e2-e3b2-70cb-b8be-64b09b626d32 │ -└──────────────────────────────────────┘ -``` - -**Пример использования, для генерации нескольких значений в одной строке** - -```sql -SELECT generateUUIDv7ThreadMonotonic(1), generateUUIDv7ThreadMonotonic(7) - -┌─generateUUIDv7ThreadMonotonic(1)─────┬─generateUUIDv7ThreadMonotonic(2)─────┐ -│ 018f05e1-14ee-7bc5-9906-207153b400b1 │ 018f05e1-14ee-7bc5-9906-2072b8e96758 │ -└──────────────────────────────────────┴──────────────────────────────────────┘ -``` - -## generateUUIDv7NonMonotonic {#uuidv7nonmonotonic-function-generate} - -Генерирует идентификатор [UUID версии 7](https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format-04). Генерируемый UUID состоит из 48-битной временной метки (Unix time в миллисекундах), маркеров версии 7 и варианта 2, и случайных данных в следующей последовательности: -``` - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | ver | rand_a | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -|var| rand_b | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| rand_b | -└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ -``` -::::note -На апрель 2024 года UUIDv7 находится в статусе черновика и его раскладка по битам может в итоге измениться. -:::: - -**Синтаксис** - -``` sql -generateUUIDv7NonMonotonic([x]) -``` - -**Аргументы** - -- `x` — [выражение](../syntax.md#syntax-expressions), возвращающее значение одного из [поддерживаемых типов данных](../data-types/index.md#data_types). Значение используется, чтобы избежать [склейки одинаковых выражений](index.md#common-subexpression-elimination), если функция вызывается несколько раз в одном запросе. Необязательный параметр. - -**Возвращаемое значение** - -Значение типа [UUID](../../sql-reference/functions/uuid-functions.md). - -**Пример использования** - -Этот пример демонстрирует, как создать таблицу с UUID-колонкой и добавить в нее сгенерированный UUIDv7. - -``` sql -CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog - -INSERT INTO t_uuid SELECT generateUUIDv7NonMonotonic() - -SELECT * FROM t_uuid -``` - -``` text -┌────────────────────────────────────x─┐ -│ 018f05af-f4a8-778f-beee-1bedbc95c93b │ -└──────────────────────────────────────┘ -``` - -**Пример использования, для генерации нескольких значений в одной строке** - -```sql -SELECT generateUUIDv7NonMonotonic(1), generateUUIDv7NonMonotonic(7) -┌─generateUUIDv7NonMonotonic(1)────────┬─generateUUIDv7NonMonotonic(2)────────┐ -│ 018f05b1-8c2e-7567-a988-48d09606ae8c │ 018f05b1-8c2e-7946-895b-fcd7635da9a0 │ -└──────────────────────────────────────┴──────────────────────────────────────┘ -``` - ## empty {#empty} Проверяет, является ли входной UUID пустым. diff --git a/docs/ru/sql-reference/statements/alter/comment.md b/docs/ru/sql-reference/statements/alter/comment.md index 727af15d03e..f841c8540f3 100644 --- a/docs/ru/sql-reference/statements/alter/comment.md +++ b/docs/ru/sql-reference/statements/alter/comment.md @@ -4,7 +4,7 @@ sidebar_position: 51 sidebar_label: COMMENT --- -# ALTER TABLE … MODIFY COMMENT {#alter-modify-comment} +# ALTER TABLE ... MODIFY COMMENT {#alter-modify-comment} Добавляет, изменяет или удаляет комментарий к таблице, независимо от того, был ли он установлен раньше или нет. Изменение комментария отражается как в системной таблице [system.tables](../../../operations/system-tables/tables.md), так и в результате выполнения запроса `SHOW CREATE TABLE`. diff --git a/docs/ru/sql-reference/statements/alter/delete.md b/docs/ru/sql-reference/statements/alter/delete.md index dc968a17349..c91a79f5cdd 100644 --- a/docs/ru/sql-reference/statements/alter/delete.md +++ b/docs/ru/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE {#alter-mutations} +# ALTER TABLE ... DELETE {#alter-mutations} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/ru/sql-reference/statements/alter/index.md b/docs/ru/sql-reference/statements/alter/index.md index 07f5ff0a298..e8b8af39e11 100644 --- a/docs/ru/sql-reference/statements/alter/index.md +++ b/docs/ru/sql-reference/statements/alter/index.md @@ -46,7 +46,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN ### Мутации {#mutations} -Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов [ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md) и [ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md), рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md), в том числе для движков с репликацией. +Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов [ALTER TABLE ... DELETE](../../../sql-reference/statements/alter/delete.md) и [ALTER TABLE ... UPDATE](../../../sql-reference/statements/alter/update.md), рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md), в том числе для движков с репликацией. Конвертировать существующие таблицы для работы с мутациями не нужно. Но после применения первой мутации формат данных таблицы становится несовместимым с предыдущими версиями и откатиться на предыдущую версию уже не получится. diff --git a/docs/ru/sql-reference/statements/alter/update.md b/docs/ru/sql-reference/statements/alter/update.md index b2032ac77d1..01574a8a9b7 100644 --- a/docs/ru/sql-reference/statements/alter/update.md +++ b/docs/ru/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE {#alter-table-update-statements} +# ALTER TABLE ... UPDATE {#alter-table-update-statements} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr diff --git a/docs/ru/sql-reference/statements/alter/view.md b/docs/ru/sql-reference/statements/alter/view.md index e6f6730ff99..53e295f6bbe 100644 --- a/docs/ru/sql-reference/statements/alter/view.md +++ b/docs/ru/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# Выражение ALTER TABLE … MODIFY QUERY {#alter-modify-query} +# Выражение ALTER TABLE ... MODIFY QUERY {#alter-modify-query} -Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. +Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE ... MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. Если при создании материализованного представления использовалась конструкция `TO [db.]name`, то для изменения отсоедините представление с помощью [DETACH](../detach.md), измените таблицу с помощью [ALTER TABLE](index.md), а затем снова присоедините запрос с помощью [ATTACH](../attach.md). diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index dbd6a325c40..a03ff7b1628 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -243,7 +243,7 @@ ClickHouse поддерживает кодеки общего назначени - `Delta(delta_bytes)` — Метод, в котором исходные значения заменяются разностью двух соседних значений, за исключением первого значения, которое остаётся неизменным. Для хранения разниц используется до `delta_bytes`, т.е. `delta_bytes` — это максимальный размер исходных данных. Возможные значения `delta_bytes`: 1, 2, 4, 8. Значение по умолчанию для `delta_bytes` равно `sizeof(type)`, если результат 1, 2, 4, or 8. Во всех других случаях — 1. - `DoubleDelta` — Вычисляется разницу от разниц и сохраняет её в компакном бинарном виде. Оптимальная степень сжатия достигается для монотонных последовательностей с постоянным шагом, наподобие временных рядов. Можно использовать с любым типом данных фиксированного размера. Реализует алгоритм, используемый в TSDB Gorilla, поддерживает 64-битные типы данных. Использует 1 дополнительный бит для 32-байтовых значений: 5-битные префиксы вместо 4-битных префиксов. Подробнее читайте в разделе «Compressing Time Stamps» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `GCD` - Вычисляет НОД всех чисел, а затем делит их на него. Этот кодек предназначен для подготовки данных и не подходит для использования без дополнительного кодека. GCD-кодек может использоваться с Integer, Decimal и DateTime. Хорошим вариантом использования было бы хранение временных меток или денежных значений с высокой точностью. -- `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Еффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе «Compressing Values» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +- `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Эффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе «Compressing Values» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `T64` — Метод сжатия который обрезает неиспользуемые старшие биты целочисленных значений (включая `Enum`, `Date` и `DateTime`). На каждом шаге алгоритма, кодек помещает блок из 64 значений в матрицу 64✕64, транспонирует её, обрезает неиспользуемые биты, а то, что осталось возвращает в виде последовательности. Неиспользуемые биты, это биты, которые не изменяются от минимального к максимальному на всём диапазоне значений куска данных. Кодеки `DoubleDelta` и `Gorilla` используются в TSDB Gorilla как компоненты алгоритма сжатия. Подход Gorilla эффективен в сценариях, когда данные представляют собой медленно изменяющиеся во времени величины. Метки времени эффективно сжимаются кодеком `DoubleDelta`, а значения кодеком `Gorilla`. Например, чтобы создать эффективно хранящуюся таблицу, используйте следующую конфигурацию: diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 032bdc6e6d4..8fa30446bb3 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -60,7 +60,7 @@ AS SELECT ... Если указано `POPULATE`, то при создании представления в него будут добавлены данные, уже содержащиеся в исходной таблице, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Если `POPULATE` не указано, представление будет содержать только данные, добавленные в таблицу после создания представления. Использовать `POPULATE` не рекомендуется, так как в представление не попадут данные, добавляемые в таблицу во время создания представления. -Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. +Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`... Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. Выполнение запросов [ALTER](../../../sql-reference/statements/alter/view.md) над материализованными представлениями имеет свои особенности, поэтому эти запросы могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления. diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 747e36b8809..309d4852b11 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -73,7 +73,7 @@ INSERT INTO insert_select_testtable VALUES (1, DEFAULT, 1) ; INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -Например, следующий формат запроса идентичен базовому варианту INSERT … VALUES: +Например, следующий формат запроса идентичен базовому варианту INSERT ... VALUES: ``` sql INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 5331cf00728..546a674d41a 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -116,7 +116,7 @@ SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UIn **Пример** -Запрос данных из файлов с именами `file000`, `file001`, … , `file999`: +Запрос данных из файлов с именами `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32'); diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index fe40cb0c507..2847a95bf19 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -108,7 +108,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. ::: -Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index 7afcc07c6fb..c91d8bcf4d1 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -190,7 +190,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" - 如果在获取系统数据时发生了zookeeper异常。副本,将其显示在单独的列中。 这实现了 [#9137](https://github.com/ClickHouse/ClickHouse/issues/9137) [#9138](https://github.com/ClickHouse/ClickHouse/pull/9138) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 原子删除destroy上的MergeTree数据部分。 [#8402](https://github.com/ClickHouse/ClickHouse/pull/8402) ([Vladimir Chebotarev](https://github.com/excitoon)) - 支持分布式表的行级安全性。 [#8926](https://github.com/ClickHouse/ClickHouse/pull/8926) ([伊万](https://github.com/abyss7)) -- Now we recognize suffix (like KB, KiB…) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- Now we recognize suffix (like KB, KiB...) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb)) - 在构建大型连接的结果时防止内存不足。 [#8637](https://github.com/ClickHouse/ClickHouse/pull/8637) ([Artem Zuikov](https://github.com/4ertus2)) - 在交互模式下为建议添加群集名称 `clickhouse-client`. [#8709](https://github.com/ClickHouse/ClickHouse/pull/8709) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries [#8820](https://github.com/ClickHouse/ClickHouse/pull/8820) ([伊万](https://github.com/abyss7)) @@ -523,7 +523,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" - 现在后台在磁盘之间移动,运行它的seprate线程池。 [#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon)) - `SYSTEM RELOAD DICTIONARY` 现在同步执行。 [#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) - 堆栈跟踪现在显示物理地址(对象文件中的偏移量),而不是虚拟内存地址(加载对象文件的位置)。 这允许使用 `addr2line` 当二进制独立于位置并且ASLR处于活动状态时。 这修复 [#8360](https://github.com/ClickHouse/ClickHouse/issues/8360). [#8387](https://github.com/ClickHouse/ClickHouse/pull/8387) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) -- 支持行级安全筛选器的新语法: `…
`. 修复 [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7)) +- 支持行级安全筛选器的新语法: `...
`. 修复 [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7)) - 现在 `cityHash` 功能可以与工作 `Decimal` 和 `UUID` 类型。 修复 [#5184](https://github.com/ClickHouse/ClickHouse/issues/5184). [#7693](https://github.com/ClickHouse/ClickHouse/pull/7693) ([米哈伊尔\*科罗托夫](https://github.com/millb)) - 从系统日志中删除了固定的索引粒度(它是1024),因为它在实现自适应粒度之后已经过时。 [#7698](https://github.com/ClickHouse/ClickHouse/pull/7698) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 当ClickHouse在没有SSL的情况下编译时,启用MySQL兼容服务器。 [#7852](https://github.com/ClickHouse/ClickHouse/pull/7852) ([尤里\*巴拉诺夫](https://github.com/yurriy)) diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md index c0a08291e02..724b22ad461 100644 --- a/docs/zh/development/style.md +++ b/docs/zh/development/style.md @@ -53,7 +53,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** 在二元运算符(`+`,`-`,`*`,`/`,`%`,…)和三元运算符 `?:` 周围添加空格。 +**7.** 在二元运算符(`+`,`-`,`*`,`/`,`%`,...)和三元运算符 `?:` 周围添加空格。 ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -82,7 +82,7 @@ dst.ClickGoodEvent = click.GoodEvent; 如有必要,运算符可以包裹到下一行。 在这种情况下,它前面的偏移量增加。 -**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, …) 和参数。 +**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, ...) 和参数。 **12.** 在逗号后面加一个空格,而不是在之前。同样的规则也适合 `for` 循环中的分号。 @@ -111,7 +111,7 @@ public: **16.** 如果对整个文件使用相同的 `namespace`,并且没有其他重要的东西,则 `namespace` 中不需要偏移量。 -**17.** 在 `if`, `for`, `while` 中包裹的代码块中,若代码是一个单行的 `statement`,那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`, `for`, `while`, … +**17.** 在 `if`, `for`, `while` 中包裹的代码块中,若代码是一个单行的 `statement`,那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`, `for`, `while`, ... 但是如果内部 `statement` 包含大括号或 `else`,则外部块应该用大括号括起来。 @@ -262,7 +262,7 @@ void executeQuery( 这个示例来源于 http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/。 -**7.** 不要在每个文件的开头写入垃圾注释(作者,创建日期…)。 +**7.** 不要在每个文件的开头写入垃圾注释(作者,创建日期...)。 **8.** 单行注释用三个斜杆: `///` ,多行注释以 `/**`开始。 这些注释会当做文档。 diff --git a/docs/zh/engines/table-engines/integrations/hdfs.md b/docs/zh/engines/table-engines/integrations/hdfs.md index 55648afe407..be673b6ce92 100644 --- a/docs/zh/engines/table-engines/integrations/hdfs.md +++ b/docs/zh/engines/table-engines/integrations/hdfs.md @@ -103,7 +103,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs **示例** -创建具有名为文件的表 `file000`, `file001`, … , `file999`: +创建具有名为文件的表 `file000`, `file001`, ... , `file999`: ``` sql CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/zh/engines/table-engines/integrations/s3.md b/docs/zh/engines/table-engines/integrations/s3.md index f2585decabf..f18814675c3 100644 --- a/docs/zh/engines/table-engines/integrations/s3.md +++ b/docs/zh/engines/table-engines/integrations/s3.md @@ -109,7 +109,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https: **示例** -使用文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`来创建表: +使用文件`file-000.csv`, `file-001.csv`, ... , `file-999.csv`来创建表: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); @@ -202,7 +202,7 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p !!! warning "Warning" 如果文件列表中包含有从0开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`. -4. 从文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`创建表: +4. 从文件`file-000.csv`, `file-001.csv`, ... , `file-999.csv`创建表: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md index 4fecf4e5669..e283a4c7510 100644 --- a/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -59,7 +59,7 @@ WHERE table = 'visits' └───────────┴────────────────┴────────┘ ``` -`partition` 列存储分区的名称。此示例中有两个分区:`201901` 和 `201902`。在 [ALTER … PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。 +`partition` 列存储分区的名称。此示例中有两个分区:`201901` 和 `201902`。在 [ALTER ... PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。 `name` 列为分区中数据片段的名称。在 [ALTER ATTACH PART](#alter_attach-partition) 语句中你可以使用此列值中来指定片段名称。 diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index bfa69338657..67bd681269b 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -702,7 +702,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' - 插入(`INSERT`查询) - 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations) - 从另一个副本下载 -- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 +- [ALTER TABLE ... FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 除了数据变异和冻结分区以外的情况下,数据按照以下逻辑存储到卷或磁盘上: @@ -713,7 +713,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' 在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。 -用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 +用户可以通过 [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 数据移动不会妨碍到数据复制。也就是说,同一张表的不同副本可以指定不同的存储策略。 diff --git a/docs/zh/engines/table-engines/special/external-data.md b/docs/zh/engines/table-engines/special/external-data.md index 688e25402ab..06c6331b4f3 100644 --- a/docs/zh/engines/table-engines/special/external-data.md +++ b/docs/zh/engines/table-engines/special/external-data.md @@ -26,7 +26,7 @@ ClickHouse 允许向服务器发送处理查询所需的数据以及 SELECT 查 以下的参数是可选的:**–name** – 表的名称,如果省略,则采用 _data。 **–format** – 文件中的数据格式。 如果省略,则使用 TabSeparated。 -以下的参数必选一个:**–types** – 逗号分隔列类型的列表。例如:`UInt64,String`。列将被命名为 _1,_2,… +以下的参数必选一个:**–types** – 逗号分隔列类型的列表。例如:`UInt64,String`。列将被命名为 _1,_2,... **–structure**– 表结构的格式 `UserID UInt64`,`URL String`。定义列的名字以及类型。 在 «file» 中指定的文件将由 «format» 中指定的格式解析,使用在 «types» 或 «structure» 中指定的数据类型。该表将被上传到服务器,并在作为名称为 «name»临时表。 diff --git a/docs/zh/faq/general/olap.md b/docs/zh/faq/general/olap.md index b014419578b..c4b36b138fa 100644 --- a/docs/zh/faq/general/olap.md +++ b/docs/zh/faq/general/olap.md @@ -10,13 +10,13 @@ sidebar_position: 100 [OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) stands for Online Analytical Processing. It is a broad term that can be looked at from two perspectives: technical and business. But at the very high level, you can just read these words backward: Processing -: Some source data is processed… +: Some source data is processed... Analytical -: …to produce some analytical reports and insights… +: ...to produce some analytical reports and insights... Online -: …in real-time. +: ...in real-time. ## OLAP from the Business Perspective {#olap-from-the-business-perspective} diff --git a/docs/zh/getting-started/example-datasets/nyc-taxi.md b/docs/zh/getting-started/example-datasets/nyc-taxi.md index 9c487140df3..ceeb6fbb9e0 100644 --- a/docs/zh/getting-started/example-datasets/nyc-taxi.md +++ b/docs/zh/getting-started/example-datasets/nyc-taxi.md @@ -196,7 +196,7 @@ real 75m56.214s (也可以直接使用`COPY ... TO PROGRAM`从Postgres中导入数据) -数据中所有与天气相关的字段(precipitation……average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们 +数据中所有与天气相关的字段(precipitation...average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们 首先,我们使用单台服务器创建表,后面我们将在多台节点上创建这些表。 diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index ecfdcddbbe2..7d4c299b919 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -212,7 +212,7 @@ ORDER BY year └──────┴─────────┴───────────────────────────────────────────────────────┘ ``` -2020 年房价出事了!但这并不令人意外…… +2020 年房价出事了!但这并不令人意外... ### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods} diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index e65cfea62cd..7e4fb6826e4 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -38,26 +38,6 @@ sudo service clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you've set up a password. ``` -
- -Deprecated Method for installing deb-packages - -``` bash -sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 - -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ - /etc/apt/sources.list.d/clickhouse.list -sudo apt-get update - -sudo apt-get install -y clickhouse-server clickhouse-client - -sudo service clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. -``` - -
- 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。 你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/deb/pool/stable)。 @@ -95,22 +75,6 @@ sudo /etc/init.d/clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you set up a password. ``` -
- -Deprecated Method for installing rpm-packages - -``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo -sudo yum install clickhouse-server clickhouse-client - -sudo /etc/init.d/clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. -``` - -
- 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。 然后运行命令安装: @@ -164,34 +128,6 @@ tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \ sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ``` -
- -Deprecated Method for installing tgz archives - -``` bash -export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ - grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh -``` - -
- 对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它,它以后缀`-stable`标志。 ### `Docker`安装包 {#from-docker-image} diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index 758992e4084..975d5eb764c 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -371,7 +371,7 @@ UserID.bin,URL.bin,和EventTime.bin是UserID :::note - 最后一个索引条目(上图中的“mark 1082”)存储了上图中颗粒1082的主键列的最大值。 -- 索引条目(索引标记)不是基于表中的特定行,而是基于颗粒。例如,对于上图中的索引条目‘mark 0’,在我们的表中没有UserID为240.923且URL为“goal://metry=10000467796a411…”的行,相反,对于该表,有一个颗粒0,在该颗粒中,最小UserID值是240.923,最小URL值是“goal://metry=10000467796a411…”,这两个值来自不同的行。 +- 索引条目(索引标记)不是基于表中的特定行,而是基于颗粒。例如,对于上图中的索引条目‘mark 0’,在我们的表中没有UserID为240.923且URL为“goal://metry=10000467796a411...”的行,相反,对于该表,有一个颗粒0,在该颗粒中,最小UserID值是240.923,最小URL值是“goal://metry=10000467796a411...”,这两个值来自不同的行。 - 主索引文件完全加载到主内存中。如果文件大于可用的空闲内存空间,则ClickHouse将发生错误。 ::: diff --git a/docs/zh/index.md b/docs/zh/index.md index fab00dbcd1b..c092f296722 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -13,10 +13,10 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) | Row | WatchID | JavaEnable | Title | GoodEvent | EventTime | |-----|-------------|------------|--------------------|-----------|---------------------| -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | ... | ... | ... | ... | ... | 处于同一行中的数据总是被物理的存储在一起。 @@ -24,13 +24,13 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) 在列式数据库系统中,数据按如下的顺序存储: -| Row: | #0 | #1 | #2 | #N | +| Row: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | -| JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | -| GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | +| JavaEnable: | 1 | 0 | 1 | ... | +| Title: | Investor Relations | Contact us | Mission | ... | +| GoodEvent: | 1 | 1 | 1 | ... | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | 这些示例只显示了数据的排列顺序。来自不同列的值被单独存储,来自同一列的数据被存储在一起。 diff --git a/docs/zh/operations/settings/query-complexity.md b/docs/zh/operations/settings/query-complexity.md index 124d5fa5d1a..b1b5ca75018 100644 --- a/docs/zh/operations/settings/query-complexity.md +++ b/docs/zh/operations/settings/query-complexity.md @@ -196,7 +196,7 @@ Restrictions on the «maximum amount of something» can take the value 0, which Limits the number of rows in the hash table that is used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. If a query contains multiple joins, ClickHouse checks this setting for every intermediate result. @@ -213,7 +213,7 @@ Default value: 0. Limits the size in bytes of the hash table used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). If the query contains joins, ClickHouse checks this setting for every intermediate result. diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index c3b4194ed44..5e59196f56c 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1002,7 +1002,7 @@ ClickHouse生成异常 ## count_distinct_implementation {#settings-count_distinct_implementation} -指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) 建筑。 +指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) 建筑。 可能的值: diff --git a/docs/zh/operations/system-tables/dictionaries.md b/docs/zh/operations/system-tables/dictionaries.md index 0cf91e45e86..c7b1bdd04be 100644 --- a/docs/zh/operations/system-tables/dictionaries.md +++ b/docs/zh/operations/system-tables/dictionaries.md @@ -21,7 +21,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. - `origin` ([字符串](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. - `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). -- `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”. +- `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, ..., type n)”. - `attribute.names` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Array of [属性名称](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 由字典提供。 - `attribute.types` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Corresponding array of [属性类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 这是由字典提供。 - `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. diff --git a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md index cb1dcc35f5c..27d3375aebb 100644 --- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md @@ -80,7 +80,7 @@ FROM 在这种情况下,您应该记住您不知道直方图bin边界。 -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch} +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} 检查序列是否包含与模式匹配的事件链。 @@ -167,7 +167,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} +## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} 计算与模式匹配的事件链的数量。该函数搜索不重叠的事件链。当前链匹配后,它开始搜索下一个链。 diff --git a/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md b/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md index 4dce65af1ed..253eb9ef82d 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 **语法** ``` sql -quantiles(level1, level2, …)(x) +quantiles(level1, level2, ...)(x) ``` 所有分位数函数(quantile)也有相应的分位数(quantiles)函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`。 这些函数一次计算所列的级别的所有分位数, 并返回结果值的数组。 diff --git a/docs/zh/sql-reference/data-types/aggregatefunction.md b/docs/zh/sql-reference/data-types/aggregatefunction.md index e8f28b367a5..80648eb165b 100644 --- a/docs/zh/sql-reference/data-types/aggregatefunction.md +++ b/docs/zh/sql-reference/data-types/aggregatefunction.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/aggregatefunction --- -# AggregateFunction(name, types_of_arguments…) {#data-type-aggregatefunction} +# AggregateFunction(name, types_of_arguments...) {#data-type-aggregatefunction} 聚合函数的中间状态,可以通过聚合函数名称加`-State`后缀的形式得到它。与此同时,当您需要访问该类型的最终状态数据时,您需要以相同的聚合函数名加`-Merge`后缀的形式来得到最终状态数据。 diff --git a/docs/zh/sql-reference/data-types/domains/index.md b/docs/zh/sql-reference/data-types/domains/index.md index c123b10f6fe..9f12018732b 100644 --- a/docs/zh/sql-reference/data-types/domains/index.md +++ b/docs/zh/sql-reference/data-types/domains/index.md @@ -19,9 +19,9 @@ Domain类型是特定实现的类型,它总是与某个现存的基础类型 ### Domains的额外特性 {#domainsde-e-wai-te-xing} - 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 -- 在INSERT INTO domain_table(domain_column) VALUES(…)中输入数据总是以更人性化的格式进行输入 +- 在INSERT INTO domain_table(domain_column) VALUES(...)中输入数据总是以更人性化的格式进行输入 - 在SELECT domain_column FROM domain_table中数据总是以更人性化的格式输出 -- 在INSERT INTO domain_table FORMAT CSV …中,实现外部源数据以更人性化的格式载入 +- 在INSERT INTO domain_table FORMAT CSV ...中,实现外部源数据以更人性化的格式载入 ### Domains类型的限制 {#domainslei-xing-de-xian-zhi} diff --git a/docs/zh/sql-reference/data-types/fixedstring.md b/docs/zh/sql-reference/data-types/fixedstring.md index 633307938a9..d454e935fe7 100644 --- a/docs/zh/sql-reference/data-types/fixedstring.md +++ b/docs/zh/sql-reference/data-types/fixedstring.md @@ -18,8 +18,8 @@ slug: /zh/sql-reference/data-types/fixedstring 可以有效存储在`FixedString`类型的列中的值的示例: - 二进制表示的IP地址(IPv6使用`FixedString(16)`) -- 语言代码(ru_RU, en_US … ) -- 货币代码(USD, RUB … ) +- 语言代码(ru_RU, en_US ... ) +- 货币代码(USD, RUB ... ) - 二进制表示的哈希值(MD5使用`FixedString(16)`,SHA256使用`FixedString(32)`) 请使用[UUID](uuid.md)数据类型来存储UUID值,。 diff --git a/docs/zh/sql-reference/data-types/nested-data-structures/nested.md b/docs/zh/sql-reference/data-types/nested-data-structures/nested.md index 5ef8256b483..57b30de0881 100644 --- a/docs/zh/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/zh/sql-reference/data-types/nested-data-structures/nested.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/nested-data-structures/nested --- -# Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} +# Nested(Name1 Type1, Name2 Type2, ...) {#nestedname1-type1-name2-type2} 嵌套数据结构类似于嵌套表。嵌套数据结构的参数(列名和类型)与 CREATE 查询类似。每个表可以包含任意多行嵌套数据结构。 diff --git a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md index 601cb602a78..fbaa76365ec 100644 --- a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md @@ -3,7 +3,7 @@ slug: /zh/sql-reference/data-types/simpleaggregatefunction --- # SimpleAggregateFunction {#data-type-simpleaggregatefunction} -`SimpleAggregateFunction(name, types_of_arguments…)` 数据类型存储聚合函数的当前值, 并不像 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 那样存储其全部状态。这种优化可以应用于具有以下属性函数: 将函数 `f` 应用于行集合 `S1 UNION ALL S2` 的结果,可以通过将 `f` 分别应用于行集合的部分, 然后再将 `f` 应用于结果来获得: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`。 这个属性保证了部分聚合结果足以计算出合并的结果,所以我们不必存储和处理任何额外的数据。 +`SimpleAggregateFunction(name, types_of_arguments...)` 数据类型存储聚合函数的当前值, 并不像 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 那样存储其全部状态。这种优化可以应用于具有以下属性函数: 将函数 `f` 应用于行集合 `S1 UNION ALL S2` 的结果,可以通过将 `f` 分别应用于行集合的部分, 然后再将 `f` 应用于结果来获得: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`。 这个属性保证了部分聚合结果足以计算出合并的结果,所以我们不必存储和处理任何额外的数据。 支持以下聚合函数: diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md index 004c80ff916..38813701c70 100644 --- a/docs/zh/sql-reference/data-types/tuple.md +++ b/docs/zh/sql-reference/data-types/tuple.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/tuple --- -# Tuple(T1, T2, …) {#tuplet1-t2} +# Tuple(T1, T2, ...) {#tuplet1-t2} 元组,其中每个元素都有单独的 [类型](index.md#data_types)。 diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index d150b94b8af..69db34e4a36 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -152,7 +152,7 @@ SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2); └─────────────┴─────────────┴────────────────┴─────────────────┘ ``` -## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1} +## array(x1, ...), operator \[x1, ...\] {#arrayx1-operator-x1} 使用函数的参数作为数组元素创建一个数组。 参数必须是常量,并且具有最小公共类型的类型。必须至少传递一个参数,否则将不清楚要创建哪种类型的数组。也就是说,你不能使用这个函数来创建一个空数组(为此,使用上面描述的’emptyArray  \*’函数)。 @@ -337,7 +337,7 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) 设置为«NULL»的元素将作为普通的元素值处理。 -## arrayCount(\[func,\] arr1, …) {#array-count} +## arrayCount(\[func,\] arr1, ...) {#array-count} `func`将arr数组作为参数,其返回结果为非零值的数量。如果未指定“func”,则返回数组中非零元素的数量。 @@ -363,7 +363,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) {#array_functions-arrayenumerate} -返回 Array \[1, 2, 3, …, length (arr) \] +返回 Array \[1, 2, 3, ..., length (arr) \] 此功能通常与ARRAY JOIN一起使用。它允许在应用ARRAY JOIN后为每个数组计算一次。例如: @@ -403,7 +403,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) 此功能也可用于高阶函数。例如,您可以使用它来获取与条件匹配的元素的数组索引。 -## arrayEnumerateUniq(arr, …) {#arrayenumerateuniqarr} +## arrayEnumerateUniq(arr, ...) {#arrayenumerateuniqarr} 返回与源数组大小相同的数组,其中每个元素表示与其下标对应的源数组元素在源数组中出现的次数。 例如:arrayEnumerateUniq( \[10,20,10,30 \])=  \[1,1,2,1 \]。 @@ -621,7 +621,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res 设置为«NULL»的数组元素作为普通的数组元素值处理。 -## arraySort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arraySort(\[func,\] arr, ...) {#array_functions-reverse-sort} 以升序对`arr`数组的元素进行排序。如果指定了`func`函数,则排序顺序由`func`函数的调用结果决定。如果`func`接受多个参数,那么`arraySort`函数也将解析与`func`函数参数相同数量的数组参数。更详细的示例在`arraySort`的末尾。 @@ -721,7 +721,7 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; !!! 注意 "注意" 为了提高排序效率, 使用了[施瓦茨变换](https://en.wikipedia.org/wiki/Schwartzian_transform)。 -## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#array_functions-reverse-sort} 以降序对`arr`数组的元素进行排序。如果指定了`func`函数,则排序顺序由`func`函数的调用结果决定。如果`func`接受多个参数,那么`arrayReverseSort`函数也将解析与`func`函数参数相同数量的数组作为参数。更详细的示例在`arrayReverseSort`的末尾。 @@ -822,7 +822,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayUniq(arr, …) {#arrayuniqarr} +## arrayUniq(arr, ...) {#arrayuniqarr} 如果传递一个参数,则计算数组中不同元素的数量。 如果传递了多个参数,则它计算多个数组中相应位置的不同元素元组的数量。 @@ -1221,7 +1221,7 @@ select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); └───────────────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) {#array-map} +## arrayMap(func, arr1, ...) {#array-map} 将从 `func` 函数的原始应用中获得的数组返回给 `arr` 数组中的每个元素。 @@ -1251,7 +1251,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res 请注意,`arrayMap` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFilter(func, arr1, …) {#array-filter} +## arrayFilter(func, arr1, ...) {#array-filter} 返回一个仅包含 `arr1` 中的元素的数组,其中 `func` 返回的值不是 0。 @@ -1284,7 +1284,7 @@ SELECT 请注意,`arrayFilter`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFill(func, arr1, …) {#array-fill} +## arrayFill(func, arr1, ...) {#array-fill} 从第一个元素到最后一个元素扫描`arr1`,如果`func`返回0,则用`arr1[i - 1]`替换`arr1[i]`。`arr1`的第一个元素不会被替换。 @@ -1302,7 +1302,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, 请注意,`arrayFill` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayReverseFill(func, arr1, …) {#array-reverse-fill} +## arrayReverseFill(func, arr1, ...) {#array-reverse-fill} 从最后一个元素到第一个元素扫描`arr1`,如果`func`返回0,则用`arr1[i + 1]`替换`arr1[i]`。`arr1`的最后一个元素不会被替换。 @@ -1320,7 +1320,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 请注意,`arrayReverseFill`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arraySplit(func, arr1, …) {#array-split} +## arraySplit(func, arr1, ...) {#array-split} 将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时,数组将在元素的左侧拆分。数组不会在第一个元素之前被拆分。 @@ -1338,7 +1338,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res 请注意,`arraySplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayReverseSplit(func, arr1, …) {#array-reverse-split} +## arrayReverseSplit(func, arr1, ...) {#array-reverse-split} 将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时,数组将在元素的右侧拆分。数组不会在最后一个元素之后被拆分。 @@ -1356,37 +1356,37 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res 请注意,`arrayReverseSplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +## arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} 如果 `arr` 中至少有一个元素 `func` 返回 0 以外的值,则返回 1。否则,它返回 0。 请注意,`arrayExists`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +## arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} 如果 `func` 为 `arr` 中的所有元素返回 0 以外的值,则返回 1。否则,它返回 0。 请注意,`arrayAll`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFirst(func, arr1, …) {#array-first} +## arrayFirst(func, arr1, ...) {#array-first} 返回 `arr1` 数组中 `func` 返回非 0 的值的第一个元素。 请注意,`arrayFirst`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayLast(func, arr1, …) {#array-last} +## arrayLast(func, arr1, ...) {#array-last} 返回 `arr1` 数组中的最后一个元素,其中 `func` 返回的值不是 0。 请注意,`arrayLast`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFirstIndex(func, arr1, …) {#array-first-index} +## arrayFirstIndex(func, arr1, ...) {#array-first-index} 返回 `arr1` 数组中第一个元素的索引,其中 `func` 返回的值不是 0。 请注意,`arrayFirstIndex`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayLastIndex(func, arr1, …) {#array-last-index} +## arrayLastIndex(func, arr1, ...) {#array-last-index} 返回 `arr1` 数组中最后一个元素的索引,其中 `func` 返回的值不是 0。 @@ -1612,7 +1612,7 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res; └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +## arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} 返回源数组中元素的部分和的数组(运行总和)。如果指定了 func 函数,则数组元素的值在求和之前由该函数转换。 diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index d6493ffe605..18b9f3495c0 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -443,7 +443,7 @@ SELECT toStartOfSecond(dt64, 'Asia/Istanbul'); `toISOWeek()`是一个兼容函数,等效于`toWeek(date,3)`。 下表描述了mode参数的工作方式。 -| Mode | First day of week | Range | Week 1 is the first week … | +| Mode | First day of week | Range | Week 1 is the first week ... | |------|-------------------|-------|-------------------------------| | 0 | Sunday | 0-53 | with a Sunday in this year | | 1 | Monday | 0-53 | with 4 or more days this year | diff --git a/docs/zh/sql-reference/functions/higher-order-functions.md b/docs/zh/sql-reference/functions/higher-order-functions.md index 929dc6f3ea7..0e08f88bba1 100644 --- a/docs/zh/sql-reference/functions/higher-order-functions.md +++ b/docs/zh/sql-reference/functions/higher-order-functions.md @@ -15,13 +15,13 @@ slug: /zh/sql-reference/functions/higher-order-functions 除了’arrayMap’和’arrayFilter’以外的所有其他函数,都可以省略第一个参数(lambda函数)。在这种情况下,默认返回数组元素本身。 -### arrayMap(func, arr1, …) {#higher_order_functions-array-map} +### arrayMap(func, arr1, ...) {#higher_order_functions-array-map} 将arr 将从’func’函数的原始应用程序获得的数组返回到’arr’数组中的每个元素。 返回从原始应用程序获得的数组 ‘func’ 函数中的每个元素 ‘arr’ 阵列。 -### arrayFilter(func, arr1, …) {#arrayfilterfunc-arr1} +### arrayFilter(func, arr1, ...) {#arrayfilterfunc-arr1} 返回一个仅包含以下元素的数组 ‘arr1’ 对于哪个 ‘func’ 返回0以外的内容。 @@ -48,31 +48,31 @@ SELECT │ [2] │ └─────┘ -### arrayCount(\[func,\] arr1, …) {#arraycountfunc-arr1} +### arrayCount(\[func,\] arr1, ...) {#arraycountfunc-arr1} 返回数组arr中非零元素的数量,如果指定了’func’,则通过’func’的返回值确定元素是否为非零元素。 -### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +### arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} 返回数组’arr’中是否存在非零元素,如果指定了’func’,则使用’func’的返回值确定元素是否为非零元素。 -### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +### arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} 返回数组’arr’中是否存在为零的元素,如果指定了’func’,则使用’func’的返回值确定元素是否为零元素。 -### arraySum(\[func,\] arr1, …) {#arraysumfunc-arr1} +### arraySum(\[func,\] arr1, ...) {#arraysumfunc-arr1} 计算arr数组的总和,如果指定了’func’,则通过’func’的返回值计算数组的总和。 -### arrayFirst(func, arr1, …) {#arrayfirstfunc-arr1} +### arrayFirst(func, arr1, ...) {#arrayfirstfunc-arr1} 返回数组中第一个匹配的元素,函数使用’func’匹配所有元素,直到找到第一个匹配的元素。 -### arrayFirstIndex(func, arr1, …) {#arrayfirstindexfunc-arr1} +### arrayFirstIndex(func, arr1, ...) {#arrayfirstindexfunc-arr1} 返回数组中第一个匹配的元素的下标索引,函数使用’func’匹配所有元素,直到找到第一个匹配的元素。 -### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +### arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} 返回源数组部分数据的总和,如果指定了`func`函数,则使用`func`的返回值计算总和。 @@ -98,7 +98,7 @@ SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res │ [1,2,0,1] │ └───────────┘ -### arraySort(\[func,\] arr1, …) {#arraysortfunc-arr1} +### arraySort(\[func,\] arr1, ...) {#arraysortfunc-arr1} 返回升序排序`arr1`的结果。如果指定了`func`函数,则排序顺序由`func`的结果决定。 @@ -124,7 +124,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL]) │ [1,2,3,4,nan,nan,NULL,NULL] │ └───────────────────────────────────────────────┘ -### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1} +### arrayReverseSort(\[func,\] arr1, ...) {#arrayreversesortfunc-arr1} 返回降序排序`arr1`的结果。如果指定了`func`函数,则排序顺序由`func`的结果决定。 diff --git a/docs/zh/sql-reference/functions/in-functions.md b/docs/zh/sql-reference/functions/in-functions.md index 346e076310e..9858159a495 100644 --- a/docs/zh/sql-reference/functions/in-functions.md +++ b/docs/zh/sql-reference/functions/in-functions.md @@ -10,10 +10,10 @@ sidebar_label: IN 运算符 请参阅[IN 运算符](../../sql-reference/operators/in.md#select-in-operators)部分。 -## tuple(x, y, …), 运算符 (x, y, …) {#tuplex-y-operator-x-y} +## tuple(x, y, ...), 运算符 (x, y, ...) {#tuplex-y-operator-x-y} 函数用于对多个列进行分组。 -对于具有类型T1,T2,…的列,它返回包含这些列的元组(T1,T2,…)。 执行该函数没有任何成本。 +对于具有类型T1,T2,...的列,它返回包含这些列的元组(T1,T2,...)。 执行该函数没有任何成本。 元组通常用作IN运算符的中间参数值,或用于创建lambda函数的形参列表。 元组不能写入表。 ## tupleElement(tuple, n), 运算符 x.N {#tupleelementtuple-n-operator-x-n} diff --git a/docs/zh/sql-reference/functions/json-functions.md b/docs/zh/sql-reference/functions/json-functions.md index 52ec0ed1535..f07de564847 100644 --- a/docs/zh/sql-reference/functions/json-functions.md +++ b/docs/zh/sql-reference/functions/json-functions.md @@ -56,7 +56,7 @@ slug: /zh/sql-reference/functions/json-functions 以下函数基于[simdjson](https://github.com/lemire/simdjson),专为更复杂的JSON解析要求而设计。但上述假设2仍然适用。 -## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys} +## JSONHas(json\[, indices_or_keys\]...) {#jsonhasjson-indices-or-keys} 如果JSON中存在该值,则返回`1`。 @@ -83,7 +83,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' select JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' -## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys} +## JSONLength(json\[, indices_or_keys\]...) {#jsonlengthjson-indices-or-keys} 返回JSON数组或JSON对象的长度。 @@ -94,7 +94,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 -## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys} +## JSONType(json\[, indices_or_keys\]...) {#jsontypejson-indices-or-keys} 返回JSON值的类型。 @@ -106,13 +106,13 @@ slug: /zh/sql-reference/functions/json-functions select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' -## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys} +## JSONExtractUInt(json\[, indices_or_keys\]...) {#jsonextractuintjson-indices-or-keys} -## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys} +## JSONExtractInt(json\[, indices_or_keys\]...) {#jsonextractintjson-indices-or-keys} -## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys} +## JSONExtractFloat(json\[, indices_or_keys\]...) {#jsonextractfloatjson-indices-or-keys} -## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys} +## JSONExtractBool(json\[, indices_or_keys\]...) {#jsonextractbooljson-indices-or-keys} 解析JSON并提取值。这些函数类似于`visitParam*`函数。 @@ -124,7 +124,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200.0 select JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 -## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys} +## JSONExtractString(json\[, indices_or_keys\]...) {#jsonextractstringjson-indices-or-keys} 解析JSON并提取字符串。此函数类似于`visitParamExtractString`函数。 @@ -140,11 +140,11 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractString('{"abc":"\\u263"}', 'abc') = '' select JSONExtractString('{"abc":"hello}', 'abc') = '' -## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type} +## JSONExtract(json\[, indices_or_keys...\], Return_type) {#jsonextractjson-indices-or-keys-return-type} 解析JSON并提取给定ClickHouse数据类型的值。 -这是以前的`JSONExtract函数的变体。 这意味着`JSONExtract(…, ‘String’)`返回与`JSONExtractString()`返回完全相同。`JSONExtract(…, ‘Float64’)`返回于`JSONExtractFloat()\`返回完全相同。 +这是以前的`JSONExtract函数的变体。 这意味着`JSONExtract(..., ‘String’)`返回与`JSONExtractString()`返回完全相同。`JSONExtract(..., ‘Float64’)`返回于`JSONExtractFloat()\`返回完全相同。 示例: @@ -156,7 +156,7 @@ slug: /zh/sql-reference/functions/json-functions SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Thursday' SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} +## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} 从JSON中解析键值对,其中值是给定的ClickHouse数据类型。 @@ -164,7 +164,7 @@ slug: /zh/sql-reference/functions/json-functions SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8') = [('a',5),('b',7),('c',11)]; -## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys} +## JSONExtractRaw(json\[, indices_or_keys\]...) {#jsonextractrawjson-indices-or-keys} 返回JSON的部分。 diff --git a/docs/zh/sql-reference/functions/null-functions.md b/docs/zh/sql-reference/functions/null-functions.md new file mode 100644 index 00000000000..c721bca458c --- /dev/null +++ b/docs/zh/sql-reference/functions/null-functions.md @@ -0,0 +1,254 @@ +--- +slug: /zh/sql-reference/functions/null-functions +--- +# Nullable处理函数 {#nullablechu-li-han-shu} + +## isNull {#isnull} + +检查参数是否为[NULL](../../sql-reference/syntax.md#null-literal)。 + + isNull(x) + +**参数** + +- `x` — 一个非复合数据类型的值。 + +**返回值** + +- `1` 如果`x`为`NULL`。 +- `0` 如果`x`不为`NULL`。 + +**示例** + +存在以下内容的表 + +```response +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` + +对其进行查询 + +```sql +SELECT x FROM t_null WHERE isNull(y) +``` + +```response +┌─x─┐ +│ 1 │ +└───┘ +``` + +## isNotNull {#isnotnull} + +检查参数是否不为 [NULL](../../sql-reference/syntax.md#null-literal). + + isNotNull(x) + +**参数:** + +- `x` — 一个非复合数据类型的值。 + +**返回值** + +- `0` 如果`x`为`NULL`。 +- `1` 如果`x`不为`NULL`。 + +**示例** + +存在以下内容的表 + +```response +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` + +对其进行查询 + +```sql +SELECT x FROM t_null WHERE isNotNull(y) +``` + +```response +┌─x─┐ +│ 2 │ +└───┘ +``` + +## 合并 {#coalesce} + +检查从左到右是否传递了«NULL»参数并返回第一个非`'NULL`参数。 + + coalesce(x,...) + +**参数:** + +- 任何数量的非复合类型的参数。所有参数必须与数据类型兼容。 + +**返回值** + +- 第一个非’NULL\`参数。 +- `NULL`,如果所有参数都是’NULL\`。 + +**示例** + +考虑可以指定多种联系客户的方式的联系人列表。 + +```response +┌─name─────┬─mail─┬─phone─────┬──icq─┐ +│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ +│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +└──────────┴──────┴───────────┴──────┘ +``` + +`mail`和`phone`字段是String类型,但`icq`字段是`UInt32`,所以它需要转换为`String`。 + +从联系人列表中获取客户的第一个可用联系方式: + +```sql +SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook +``` + +```response +┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐ +│ client 1 │ 123-45-67 │ +│ client 2 │ ᴺᵁᴸᴸ │ +└──────────┴──────────────────────────────────────────────────────┘ +``` + +## ifNull {#ifnull} + +如果第一个参数为«NULL»,则返回第二个参数的值。 + + ifNull(x,alt) + +**参数:** + +- `x` — 要检查«NULL»的值。 +- `alt` — 如果`x`为’NULL\`,函数返回的值。 + +**返回值** + +- 价值 `x`,如果 `x` 不是 `NULL`. +- 价值 `alt`,如果 `x` 是 `NULL`. + +**示例** + + SELECT ifNull('a', 'b') + + ┌─ifNull('a', 'b')─┐ + │ a │ + └──────────────────┘ + + SELECT ifNull(NULL, 'b') + + ┌─ifNull(NULL, 'b')─┐ + │ b │ + └───────────────────┘ + +## nullIf {#nullif} + +如果参数相等,则返回`NULL`。 + + nullIf(x, y) + +**参数:** + +`x`, `y` — 用于比较的值。 它们必须是类型兼容的,否则将抛出异常。 + +**返回值** + +- 如果参数相等,则为`NULL`。 +- 如果参数不相等,则为`x`值。 + +**示例** + + SELECT nullIf(1, 1) + + ┌─nullIf(1, 1)─┐ + │ ᴺᵁᴸᴸ │ + └──────────────┘ + + SELECT nullIf(1, 2) + + ┌─nullIf(1, 2)─┐ + │ 1 │ + └──────────────┘ + +## assumeNotNull {#assumenotnull} + +将[可为空](../../sql-reference/functions/functions-for-nulls.md)类型的值转换为非`Nullable`类型的值。 + + assumeNotNull(x) + +**参数:** + +- `x` — 原始值。 + +**返回值** + +- 如果`x`不为`NULL`,返回非`Nullable`类型的原始值。 +- 如果`x`为`NULL`,则返回任意值。 + +**示例** + +存在如下`t_null`表。 + + SHOW CREATE TABLE t_null + + ┌─statement─────────────────────────────────────────────────────────────────┐ + │ CREATE TABLE default.t_null ( x Int8, y Nullable(Int8)) ENGINE = TinyLog │ + └───────────────────────────────────────────────────────────────────────────┘ + + ┌─x─┬────y─┐ + │ 1 │ ᴺᵁᴸᴸ │ + │ 2 │ 3 │ + └───┴──────┘ + +将列`y`作为`assumeNotNull`函数的参数。 + + SELECT assumeNotNull(y) FROM t_null + + ┌─assumeNotNull(y)─┐ + │ 0 │ + │ 3 │ + └──────────────────┘ + + SELECT toTypeName(assumeNotNull(y)) FROM t_null + + ┌─toTypeName(assumeNotNull(y))─┐ + │ Int8 │ + │ Int8 │ + └──────────────────────────────┘ + +## 可调整 {#tonullable} + +将参数的类型转换为`Nullable`。 + + toNullable(x) + +**参数:** + +- `x` — 任何非复合类型的值。 + +**返回值** + +- 输入的值,但其类型为`Nullable`。 + +**示例** + + SELECT toTypeName(10) + + ┌─toTypeName(10)─┐ + │ UInt8 │ + └────────────────┘ + + SELECT toTypeName(toNullable(10)) + + ┌─toTypeName(toNullable(10))─┐ + │ Nullable(UInt8) │ + └────────────────────────────┘ diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index 2eeaad63694..9c28ff867c5 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -90,7 +90,7 @@ SELECT 'some-file-name' AS a, basename(a) 将一个常量列变为一个非常量列。 在ClickHouse中,非常量列和常量列在内存中的表示方式不同。尽管函数对于常量列和非常量总是返回相同的结果,但它们的工作方式可能完全不同(执行不同的代码)。此函数用于调试这种行为。 -## ignore(…) {#ignore} +## ignore(...) {#ignore} 接受任何参数,包括`NULL`。始终返回0。 但是,函数的参数总是被计算的。该函数可以用于基准测试。 diff --git a/docs/zh/sql-reference/functions/string-functions.md b/docs/zh/sql-reference/functions/string-functions.md index d1914839d7c..c28735c7dc7 100644 --- a/docs/zh/sql-reference/functions/string-functions.md +++ b/docs/zh/sql-reference/functions/string-functions.md @@ -95,7 +95,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') 以Unicode字符为单位反转UTF-8编码的字符串。如果字符串不是UTF-8编码,则可能获取到一个非预期的结果(不会抛出异常)。 -## format(pattern, s0, s1, …) {#formatpattern-s0-s1} +## format(pattern, s0, s1, ...) {#formatpattern-s0-s1} 使用常量字符串`pattern`格式化其他参数。`pattern`字符串中包含由大括号`{}`包围的«替换字段»。 未被包含在大括号中的任何内容都被视为文本内容,它将原样保留在返回值中。 如果你需要在文本内容中包含一个大括号字符,它可以通过加倍来转义:`{{ '{{' }}`和`{{ '{{' }} '}}' }}`。 字段名称可以是数字(从零开始)或空(然后将它们视为连续数字) @@ -113,11 +113,11 @@ SELECT format('{} {}', 'Hello', 'World') └───────────────────────────────────┘ ``` -## concat(s1, s2, …) {#concat-s1-s2} +## concat(s1, s2, ...) {#concat-s1-s2} 将参数中的多个字符串拼接,不带分隔符。 -## concatAssumeInjective(s1, s2, …) {#concatassumeinjectives1-s2} +## concatAssumeInjective(s1, s2, ...) {#concatassumeinjectives1-s2} 与[concat](#concat-s1-s2)相同,区别在于,你需要保证concat(s1, s2, s3) -\> s4是单射的,它将用于GROUP BY的优化。 diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md index 972fd84e2a1..8ada76eeeda 100644 --- a/docs/zh/sql-reference/functions/string-search-functions.md +++ b/docs/zh/sql-reference/functions/string-search-functions.md @@ -204,7 +204,7 @@ SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); **语法** ```sql -multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) +multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) ``` ## multiSearchFirstIndex @@ -216,7 +216,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) **语法** ```sql -multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +multiSearchFirstIndex(haystack, \[needle1, needle2, ..., needlen\]) ``` ## multiSearchAny {#multisearchany} @@ -229,7 +229,7 @@ multiSearchFirstIndex(haystack, \[needle1, needle2, …, n **语法** ```sql -multiSearchAny(haystack, [needle1, needle2, …, needleN]) +multiSearchAny(haystack, [needle1, needle2, ..., needleN]) ``` ## match {#match} @@ -273,7 +273,7 @@ Hyperscan 通常容易受到正则表达式拒绝服务 (ReDoS) 攻击。有关 **语法** ```sql -multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAnyIndex @@ -283,7 +283,7 @@ multiMatchAny(haystack, \[pattern1, pattern2, …, pattern **语法** ```sql -multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAllIndices @@ -293,7 +293,7 @@ multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, pa **语法** ```sql -multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAny @@ -307,7 +307,7 @@ multiMatchAllIndices(haystack, \[pattern1, pattern2, …, **语法** ```sql -multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAnyIndex @@ -317,7 +317,7 @@ multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern21, pattern2, …, patternn\]) +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAllIndices @@ -327,7 +327,7 @@ multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2 **语法** ```sql -multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## extract diff --git a/docs/zh/sql-reference/functions/url-functions.md b/docs/zh/sql-reference/functions/url-functions.md index 44880b6ca1a..e7a0354c0bf 100644 --- a/docs/zh/sql-reference/functions/url-functions.md +++ b/docs/zh/sql-reference/functions/url-functions.md @@ -11,7 +11,7 @@ slug: /zh/sql-reference/functions/url-functions ### 协议 {#protocol} -返回URL的协议。例如: http、ftp、mailto、magnet… +返回URL的协议。例如: http、ftp、mailto、magnet... ### 域 {#domain} diff --git a/docs/zh/sql-reference/statements/alter/delete.md b/docs/zh/sql-reference/statements/alter/delete.md index 5eb77c35a93..f0b41c4e214 100644 --- a/docs/zh/sql-reference/statements/alter/delete.md +++ b/docs/zh/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE 语句 {#alter-mutations} +# ALTER TABLE ... DELETE 语句 {#alter-mutations} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/zh/sql-reference/statements/alter/index.md b/docs/zh/sql-reference/statements/alter/index.md index e173837a16c..2286dcccd13 100644 --- a/docs/zh/sql-reference/statements/alter/index.md +++ b/docs/zh/sql-reference/statements/alter/index.md @@ -38,7 +38,7 @@ sidebar_label: ALTER ## Mutations 突变 {#mutations} -用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的,最明显的是[ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程,类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并,产生新的“突变”版本的部件。 +用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的,最明显的是[ALTER TABLE ... DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE ... UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程,类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并,产生新的“突变”版本的部件。 diff --git a/docs/zh/sql-reference/statements/alter/update.md b/docs/zh/sql-reference/statements/alter/update.md index 97b2b43d889..7cf37401dc5 100644 --- a/docs/zh/sql-reference/statements/alter/update.md +++ b/docs/zh/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE 语句 {#alter-table-update-statements} +# ALTER TABLE ... UPDATE 语句 {#alter-table-update-statements} ``` sql ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr diff --git a/docs/zh/sql-reference/statements/alter/view.md b/docs/zh/sql-reference/statements/alter/view.md index 34a612803c1..a19d918612a 100644 --- a/docs/zh/sql-reference/statements/alter/view.md +++ b/docs/zh/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# ALTER TABLE … MODIFY QUERY 语句 {#alter-modify-query} +# ALTER TABLE ... MODIFY QUERY 语句 {#alter-modify-query} -当使用`ALTER TABLE … MODIFY QUERY`语句创建一个[物化视图](../create/view.md#materialized)时,可以修改`SELECT`查询。当物化视图在没有 `TO [db.]name` 的情况下创建时使用它。必须启用 `allow_experimental_alter_materialized_view_structure`设置。 +当使用`ALTER TABLE ... MODIFY QUERY`语句创建一个[物化视图](../create/view.md#materialized)时,可以修改`SELECT`查询。当物化视图在没有 `TO [db.]name` 的情况下创建时使用它。必须启用 `allow_experimental_alter_materialized_view_structure`设置。 如果一个物化视图使用`TO [db.]name`,你必须先 [DETACH](../detach.mdx) 视图。用[ALTER TABLE](index.md)修改目标表,然后 [ATTACH](../attach.mdx)之前分离的(`DETACH`)视图。 diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index bce0994ecd2..49a1d66bdf1 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -55,7 +55,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中 如果指定`POPULATE`,则在创建视图时将现有表数据插入到视图中,就像创建一个`CREATE TABLE ... AS SELECT ...`一样。 否则,查询仅包含创建视图后插入表中的数据。 我们**不建议**使用POPULATE,因为在创建视图期间插入表中的数据不会插入其中。 -`SELECT` 查询可以包含`DISTINCT`、`GROUP BY`、`ORDER BY`、`LIMIT`……请注意,相应的转换是在每个插入数据块上独立执行的。 例如,如果设置了`GROUP BY`,则在插入期间聚合数据,但仅在插入数据的单个数据包内。 数据不会被进一步聚合。 例外情况是使用独立执行数据聚合的`ENGINE`,例如`SummingMergeTree`。 +`SELECT` 查询可以包含`DISTINCT`、`GROUP BY`、`ORDER BY`、`LIMIT`...请注意,相应的转换是在每个插入数据块上独立执行的。 例如,如果设置了`GROUP BY`,则在插入期间聚合数据,但仅在插入数据的单个数据包内。 数据不会被进一步聚合。 例外情况是使用独立执行数据聚合的`ENGINE`,例如`SummingMergeTree`。 在物化视图上执行[ALTER](../../../sql-reference/statements/alter/index.md)查询有局限性,因此可能不方便。 如果物化视图使用构造`TO [db.]name`,你可以`DETACH`视图,为目标表运行`ALTER`,然后`ATTACH`先前分离的(`DETACH`)视图。 diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index f80c0a8a8ea..a08a78b6f1d 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -68,7 +68,7 @@ SELECT * FROM insert_select_testtable; INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -例如,下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同: +例如,下面的查询所使用的输入格式就与上面INSERT ... VALUES的中使用的输入格式相同: ``` sql INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/zh/sql-reference/statements/select/limit.md b/docs/zh/sql-reference/statements/select/limit.md index 2bbf2949707..795f3f4ecd1 100644 --- a/docs/zh/sql-reference/statements/select/limit.md +++ b/docs/zh/sql-reference/statements/select/limit.md @@ -13,11 +13,11 @@ sidebar_label: LIMIT 如果没有 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句显式排序结果,结果的行选择可能是任意的和非确定性的。 -## LIMIT … WITH TIES 修饰符 {#limit-with-ties} +## LIMIT ... WITH TIES 修饰符 {#limit-with-ties} 如果为 `LIMIT n[,m]` 设置了 `WITH TIES` ,并且声明了 `ORDER BY expr_list`, 除了得到无修饰符的结果(正常情况下的 `limit n`, 前n行数据), 还会返回与第`n`行具有相同排序字段的行(即如果第n+1行的字段与第n行 拥有相同的排序字段,同样返回该结果. -此修饰符可以与: [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用. +此修饰符可以与: [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用. 例如以下查询: diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md index 3286fc9f9e7..2f2d9a4959c 100644 --- a/docs/zh/sql-reference/statements/select/order-by.md +++ b/docs/zh/sql-reference/statements/select/order-by.md @@ -89,7 +89,7 @@ SELECT a, b, c FROM t ORDER BY a, b, c ## ORDER BY Expr WITH FILL Modifier {#orderby-with-fill} -此修饰符可以与 [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用. +此修饰符可以与 [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用. 可以在`ORDER BY expr`之后用可选的`FROM expr`,`TO expr`和`STEP expr`参数来设置`WITH FILL`修饰符。 所有`expr`列的缺失值将被顺序填充,而其他列将被填充为默认值。 diff --git a/docs/zh/sql-reference/table-functions/file.md b/docs/zh/sql-reference/table-functions/file.md index 28682255738..fa1ec12f7df 100644 --- a/docs/zh/sql-reference/table-functions/file.md +++ b/docs/zh/sql-reference/table-functions/file.md @@ -114,7 +114,7 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32') **示例** -从名为 `file000`, `file001`, … , `file999`的文件中查询数据: +从名为 `file000`, `file001`, ... , `file999`的文件中查询数据: ``` sql SELECT count(*) diff --git a/docs/zh/sql-reference/table-functions/hdfs.md b/docs/zh/sql-reference/table-functions/hdfs.md index b10b10ae2d2..f8320d8d0bb 100644 --- a/docs/zh/sql-reference/table-functions/hdfs.md +++ b/docs/zh/sql-reference/table-functions/hdfs.md @@ -84,7 +84,7 @@ FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value U **示例** -从名为 `file000`, `file001`, … , `file999`的文件中查询数据: +从名为 `file000`, `file001`, ... , `file999`的文件中查询数据: ``` sql SELECT count(*) diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md index f7384a7526e..4f2c7299d95 100644 --- a/docs/zh/sql-reference/table-functions/s3.md +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -99,7 +99,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi !!! warning "Warning" 如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 -计算名为 `file-000.csv`, `file-001.csv`, … , `file-999.csv` 文件的总行数: +计算名为 `file-000.csv`, `file-001.csv`, ... , `file-999.csv` 文件的总行数: ``` sql SELECT count(*) diff --git a/packages/clickhouse-server.init b/packages/clickhouse-server.init index f215e52b6f3..0ac9cf7ae1f 100755 --- a/packages/clickhouse-server.init +++ b/packages/clickhouse-server.init @@ -1,10 +1,11 @@ #!/bin/sh ### BEGIN INIT INFO # Provides: clickhouse-server +# Required-Start: $network +# Required-Stop: $network +# Should-Start: $time # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 -# Should-Start: $time $network -# Should-Stop: $network # Short-Description: clickhouse-server daemon ### END INIT INFO # diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 0d91de2dad8..4640882f2be 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -162,7 +162,7 @@ if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID) set (HARMFUL_LIB harmful) endif () -target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB}) +target_link_libraries (clickhouse PRIVATE clickhouse_common_io ${HARMFUL_LIB}) target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if (ENABLE_CLICKHOUSE_KEEPER) diff --git a/programs/bash-completion/completions/clickhouse-bootstrap b/programs/bash-completion/completions/clickhouse-bootstrap index 2862140b528..73e2ef07477 100644 --- a/programs/bash-completion/completions/clickhouse-bootstrap +++ b/programs/bash-completion/completions/clickhouse-bootstrap @@ -154,7 +154,8 @@ function _clickhouse_quote() # Extract every option (everything that starts with "-") from the --help dialog. function _clickhouse_get_options() { - "$@" --help 2>&1 | awk -F '[ ,=<>.]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 1, 1) == "-" && length($i) > 1) print $i; } }' | sort -u + # By default --help will not print all settings, this is done only under --verbose + "$@" --help --verbose 2>&1 | awk -F '[ ,=<>.]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 1, 1) == "-" && length($i) > 1) print $i; } }' | sort -u } function _complete_for_clickhouse_generic_bin_impl() diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index eecc352d073..48dca82eb2b 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -80,6 +82,7 @@ public: double max_time_, size_t confidence_, const String & query_id_, + const String & query_id_prefix_, const String & query_to_execute_, size_t max_consecutive_errors_, bool continue_on_errors_, @@ -98,6 +101,7 @@ public: max_time(max_time_), confidence(confidence_), query_id(query_id_), + query_id_prefix(query_id_prefix_), query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), max_consecutive_errors(max_consecutive_errors_), @@ -205,6 +209,7 @@ private: double max_time; size_t confidence; String query_id; + String query_id_prefix; String query_to_execute; bool continue_on_errors; size_t max_consecutive_errors; @@ -463,8 +468,11 @@ private: RemoteQueryExecutor executor( *entry, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); + if (!query_id.empty()) executor.setQueryId(query_id); + else if (!query_id_prefix.empty()) + executor.setQueryId(query_id_prefix + "_" + Poco::UUIDGenerator().createRandom().toString()); Progress progress; executor.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); }); @@ -617,6 +625,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("stacktrace", "print stack traces of exceptions") ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") + ("query_id_prefix", value()->default_value(""), "") ("max-consecutive-errors", value()->default_value(0), "set number of allowed consecutive errors") ("ignore-error,continue_on_errors", "continue testing even if a query fails") ("reconnect", "establish new connection for every query") @@ -671,6 +680,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["timelimit"].as(), options["confidence"].as(), options["query_id"].as(), + options["query_id_prefix"].as(), options["query"].as(), options["max-consecutive-errors"].as(), options.count("ignore-error"), diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt index e160355ef7b..f8ef8ccaf65 100644 --- a/programs/client/CMakeLists.txt +++ b/programs/client/CMakeLists.txt @@ -10,7 +10,6 @@ set (CLICKHOUSE_CLIENT_LINK clickhouse_common_io clickhouse_functions clickhouse_parsers - string_utils ) if (TARGET ch_rust::skim) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 396cd3e646b..efe23d57478 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -263,7 +263,7 @@ void Client::initialize(Poco::Util::Application & self) config().add(loaded_config.configuration); } else if (config().has("connection")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exists"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exist"); /** getenv is thread-safe in Linux glibc and in all sane libc implementations. * But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer. @@ -1178,7 +1178,7 @@ void Client::processConfig() pager = config().getString("pager", ""); - setDefaultFormatsFromConfiguration(); + setDefaultFormatsAndCompressionFromConfiguration(); global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); global_context->setQueryKindInitial(); diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index ea84cd0682d..7213802ea86 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -84,7 +84,7 @@ private: for (const auto & file_name : file_names) { - auto path = relative_path + "/" + file_name; + auto path = relative_path.empty() ? file_name : (relative_path + "/" + file_name); if (disk->isDirectory(path)) listRecursive(disk, path); } diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 6c768799221..5da5ab4bae9 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -97,7 +97,8 @@ void DisksApp::processOptions() DisksApp::~DisksApp() { - global_context->shutdown(); + if (global_context) + global_context->shutdown(); } void DisksApp::init(std::vector & common_arguments) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index d4b975ce1e8..1b91e7ceaf3 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index fdabeacd46e..5430c4b0a42 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -233,7 +233,7 @@ struct Commit }; -enum class FileChangeType +enum class FileChangeType : uint8_t { Add, Delete, @@ -291,7 +291,7 @@ struct FileChange }; -enum class LineType +enum class LineType : uint8_t { Empty, Comment, diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 6bed114238a..d6576927a20 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -323,7 +323,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) { fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n", main_bin_path.string(), points_to.string(), binary_self_canonical_path.string()); - fs::remove(main_bin_path); + (void)fs::remove(main_bin_path); } } } @@ -489,7 +489,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) { fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n", symlink_path.string(), points_to.string(), main_bin_path.string()); - fs::remove(symlink_path); + (void)fs::remove(symlink_path); } } } @@ -1006,7 +1006,7 @@ namespace else { fmt::print("{} file exists but damaged, ignoring.\n", pid_file.string()); - fs::remove(pid_file); + (void)fs::remove(pid_file); } } else @@ -1014,7 +1014,7 @@ namespace /// Create a directory for pid file. /// It's created by "install" but we also support cases when ClickHouse is already installed different way. fs::path pid_path = pid_file; - pid_path.remove_filename(); + pid_path = pid_path.remove_filename(); fs::create_directories(pid_path); /// All users are allowed to read pid file (for clickhouse status command). fs::permissions(pid_path, fs::perms::owner_all | fs::perms::group_read | fs::perms::others_read, fs::perm_options::replace); @@ -1098,7 +1098,7 @@ namespace else { fmt::print("{} file exists but damaged, ignoring.\n", pid_file.string()); - fs::remove(pid_file); + (void)fs::remove(pid_file); } } catch (const Exception & e) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 2ec43ae15d0..df9da8e9613 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String path; @@ -58,7 +63,7 @@ void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con auto new_path = client->getAbsolutePath(query->args[0].safeGet()); if (!client->zookeeper->exists(new_path)) - std::cerr << "Path " << new_path << " does not exists\n"; + std::cerr << "Path " << new_path << " does not exist\n"; else client->cwd = new_path; } @@ -208,6 +213,143 @@ void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client std::cout << "numChildren = " << stat.numChildren << "\n"; } +namespace +{ + +/// Helper class for parallelized tree traversal +template +struct TraversalTask : public std::enable_shared_from_this> +{ + using TraversalTaskPtr = std::shared_ptr>; + + struct Ctx + { + std::deque new_tasks; /// Tasks for newly discovered children, that hasn't been started yet + std::deque> in_flight_list_requests; /// In-flight getChildren requests + std::deque> finish_callbacks; /// Callbacks to be called + KeeperClient * client; + UserCtx & user_ctx; + + Ctx(KeeperClient * client_, UserCtx & user_ctx_) : client(client_), user_ctx(user_ctx_) {} + }; + +private: + const fs::path path; + const TraversalTaskPtr parent; + + Int64 child_tasks = 0; + Int64 nodes_in_subtree = 1; + +public: + TraversalTask(const fs::path & path_, TraversalTaskPtr parent_) + : path(path_) + , parent(parent_) + { + } + + /// Start traversing the subtree + void onStart(Ctx & ctx) + { + /// tryGetChildren doesn't throw if the node is not found (was deleted in the meantime) + std::shared_ptr> list_request = + std::make_shared>(ctx.client->zookeeper->asyncTryGetChildren(path)); + ctx.in_flight_list_requests.push_back([task = this->shared_from_this(), list_request](Ctx & ctx_) mutable + { + task->onGetChildren(ctx_, list_request->get()); + }); + } + + /// Called when getChildren request returns + void onGetChildren(Ctx & ctx, const Coordination::ListResponse & response) + { + const bool traverse_children = ctx.user_ctx.onListChildren(path, response.names); + + if (traverse_children) + { + /// Schedule traversal of each child + for (const auto & child : response.names) + { + auto task = std::make_shared(path / child, this->shared_from_this()); + ctx.new_tasks.push_back(task); + } + child_tasks = response.names.size(); + } + + if (child_tasks == 0) + finish(ctx); + } + + /// Called when a child subtree has been traversed + void onChildTraversalFinished(Ctx & ctx, Int64 child_nodes_in_subtree) + { + nodes_in_subtree += child_nodes_in_subtree; + + --child_tasks; + + /// Finish if all children have been traversed + if (child_tasks == 0) + finish(ctx); + } + +private: + /// This node and all its children have been traversed + void finish(Ctx & ctx) + { + ctx.user_ctx.onFinishChildrenTraversal(path, nodes_in_subtree); + + if (!parent) + return; + + /// Notify the parent that we have finished traversing the subtree + ctx.finish_callbacks.push_back([p = this->parent, child_nodes_in_subtree = this->nodes_in_subtree](Ctx & ctx_) + { + p->onChildTraversalFinished(ctx_, child_nodes_in_subtree); + }); + } +}; + +/// Traverses the tree in parallel and calls user callbacks +/// Parallelization is achieved by sending multiple async getChildren requests to Keeper, but all processing is done in a single thread +template +void parallelized_traverse(const fs::path & path, KeeperClient * client, size_t max_in_flight_requests, UserCtx & ctx_) +{ + typename TraversalTask::Ctx ctx(client, ctx_); + + auto root_task = std::make_shared>(path, nullptr); + + ctx.new_tasks.push_back(root_task); + + /// Until there is something to do + while (!ctx.new_tasks.empty() || !ctx.in_flight_list_requests.empty() || !ctx.finish_callbacks.empty()) + { + /// First process all finish callbacks, they don't wait for anything and allow to free memory + while (!ctx.finish_callbacks.empty()) + { + auto callback = std::move(ctx.finish_callbacks.front()); + ctx.finish_callbacks.pop_front(); + callback(ctx); + } + + /// Make new requests if there are less than max in flight + while (!ctx.new_tasks.empty() && ctx.in_flight_list_requests.size() < max_in_flight_requests) + { + auto task = std::move(ctx.new_tasks.front()); + ctx.new_tasks.pop_front(); + task->onStart(ctx); + } + + /// Wait for first request in the queue to finish + if (!ctx.in_flight_list_requests.empty()) + { + auto request = std::move(ctx.in_flight_list_requests.front()); + ctx.in_flight_list_requests.pop_front(); + request(ctx); + } + } +} + +} /// anonymous namespace + bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { ASTPtr threshold; @@ -216,6 +358,8 @@ bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr & node->args.push_back(threshold->as().value); + ParserToken{TokenType::Whitespace}.ignore(pos); + String path; if (!parseKeeperPath(pos, expected, path)) path = "."; @@ -229,23 +373,21 @@ void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client auto threshold = query->args[0].safeGet(); auto path = client->getAbsolutePath(query->args[1].safeGet()); - Coordination::Stat stat; - client->zookeeper->get(path, &stat); - - if (stat.numChildren >= static_cast(threshold)) + struct { - std::cout << static_cast(path) << "\t" << stat.numChildren << "\n"; - return; - } + bool onListChildren(const fs::path & path, const Strings & children) const + { + if (children.size() >= threshold) + std::cout << static_cast(path) << "\t" << children.size() << "\n"; + return true; + } - auto children = client->zookeeper->getChildren(path); - std::sort(children.begin(), children.end()); - for (const auto & child : children) - { - auto next_query = *query; - next_query.args[1] = DB::Field(path / child); - execute(&next_query, client); - } + void onFinishChildrenTraversal(const fs::path &, Int64) const {} + + size_t threshold; + } ctx {.threshold = threshold }; + + parallelized_traverse(path, client, /* max_in_flight_requests */ 50, ctx); } bool DeleteStaleBackups::parse(IParser::Pos & /* pos */, std::shared_ptr & /* node */, Expected & /* expected */) const @@ -315,30 +457,23 @@ void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) auto path = client->getAbsolutePath(query->args[0].safeGet()); auto n = query->args[1].safeGet(); - std::vector> result; - - std::queue queue; - queue.push(path); - while (!queue.empty()) + struct { - auto next_path = queue.front(); - queue.pop(); + std::vector> result; - auto children = client->zookeeper->getChildren(next_path); - for (auto & child : children) - child = next_path / child; - auto response = client->zookeeper->get(children); + bool onListChildren(const fs::path &, const Strings &) const { return true; } - for (size_t i = 0; i < response.size(); ++i) + void onFinishChildrenTraversal(const fs::path & path, Int64 nodes_in_subtree) { - result.emplace_back(response[i].stat.numChildren, children[i]); - queue.push(children[i]); + result.emplace_back(nodes_in_subtree, path.string()); } - } + } ctx; - std::sort(result.begin(), result.end(), std::greater()); - for (UInt64 i = 0; i < std::min(result.size(), static_cast(n)); ++i) - std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n"; + parallelized_traverse(path, client, /* max_in_flight_requests */ 50, ctx); + + std::sort(ctx.result.begin(), ctx.result.end(), std::greater()); + for (UInt64 i = 0; i < std::min(ctx.result.size(), static_cast(n)); ++i) + std::cout << std::get<1>(ctx.result[i]) << "\t" << std::get<0>(ctx.result[i]) << "\n"; } bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const @@ -427,7 +562,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient new_members = query->args[1].safeGet(); break; default: - UNREACHABLE(); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected operation: {}", operation); } auto response = client->zookeeper->reconfig(joining, leaving, new_members); diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 52d825f30e6..ebec337060c 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -86,7 +86,10 @@ std::vector KeeperClient::getCompletions(const String & prefix) const void KeeperClient::askConfirmation(const String & prompt, std::function && callback) { if (!ask_confirmation) - return callback(); + { + callback(); + return; + } std::cout << prompt << " Continue?\n"; waiting_confirmation = true; diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 4cfea38c48e..5f2787f8930 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -58,7 +58,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) DB::KeeperSnapshotManager manager(1, keeper_context); auto snp = manager.serializeSnapshotToBuffer(snapshot); auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); - std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl; + std::cout << "Snapshot serialized to path:" << fs::path(file_info->disk->getPath()) / file_info->path << std::endl; } catch (...) { diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 70578601f4c..22874e199be 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -9,8 +9,6 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_common_zookeeper daemon dbms - - ${LINK_RESOURCE_LIB} ) clickhouse_program_add(keeper) @@ -148,6 +146,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -210,8 +209,6 @@ if (BUILD_STANDALONE_KEEPER) loggers_no_text_log clickhouse_common_io clickhouse_parsers # Otherwise compression will not built. FIXME. - - ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a558ed64bf9..dba5c2b7d2a 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -182,6 +182,11 @@ std::string Keeper::getDefaultConfigFileName() const return "keeper_config.xml"; } +bool Keeper::allowTextLog() const +{ + return false; +} + void Keeper::handleCustomArguments(const std::string & arg, [[maybe_unused]] const std::string & value) // NOLINT { if (arg == "force-recovery") @@ -249,11 +254,6 @@ struct KeeperHTTPContext : public IHTTPContext return context->getConfigRef().getUInt64("keeper_server.http_max_field_value_size", 128 * 1024); } - uint64_t getMaxChunkSize() const override - { - return context->getConfigRef().getUInt64("keeper_server.http_max_chunk_size", 100_GiB); - } - Poco::Timespan getReceiveTimeout() const override { return {context->getConfigRef().getInt64("keeper_server.http_receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0}; diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h index f889ffa595b..c449c40b610 100644 --- a/programs/keeper/Keeper.h +++ b/programs/keeper/Keeper.h @@ -65,6 +65,8 @@ protected: std::string getDefaultConfigFileName() const override; + bool allowTextLog() const override; + private: Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp index be2686d936b..f2f91930ac0 100644 --- a/programs/keeper/clickhouse-keeper.cpp +++ b/programs/keeper/clickhouse-keeper.cpp @@ -1,4 +1,4 @@ -#include +#include #include "config_tools.h" diff --git a/programs/library-bridge/ExternalDictionaryLibraryUtils.h b/programs/library-bridge/ExternalDictionaryLibraryUtils.h index e6bf8f2a4c3..2eb44022742 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryUtils.h +++ b/programs/library-bridge/ExternalDictionaryLibraryUtils.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp index 26d887cfc98..8d116e537aa 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.cpp +++ b/programs/library-bridge/LibraryBridgeHandlers.cpp @@ -284,7 +284,6 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ else if (method == "extDict_loadIds") { LOG_DEBUG(log, "Getting diciontary ids for dictionary with id: {}", dictionary_id); - String ids_string; std::vector ids = parseIdsFromBinary(request.getStream()); auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id); diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h index 1db71eb24cb..62fbf2caede 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.h +++ b/programs/library-bridge/LibraryBridgeHandlers.h @@ -23,7 +23,7 @@ public: void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - static constexpr inline auto FORMAT = "RowBinary"; + static constexpr auto FORMAT = "RowBinary"; const size_t keep_alive_timeout; LoggerPtr log; diff --git a/programs/library-bridge/SharedLibrary.cpp b/programs/library-bridge/SharedLibrary.cpp index d70709474b5..7423f9b89f4 100644 --- a/programs/library-bridge/SharedLibrary.cpp +++ b/programs/library-bridge/SharedLibrary.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes SharedLibrary::SharedLibrary(std::string_view path, int flags) { - handle = dlopen(path.data(), flags); + handle = dlopen(path.data(), flags); // NOLINT if (!handle) throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror()); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror @@ -34,7 +34,7 @@ void * SharedLibrary::getImpl(std::string_view name, bool no_throw) { dlerror(); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror - auto * res = dlsym(handle, name.data()); + auto * res = dlsym(handle, name.data()); // NOLINT if (char * error = dlerror()) // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 5f2a51406e1..4d5cfb09e6a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -161,6 +160,14 @@ void LocalServer::initialize(Poco::Util::Application & self) getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads); + const size_t unexpected_parts_loading_threads = config().getUInt("max_unexpected_parts_loading_thread_pool_size", 32); + getUnexpectedPartsLoadingThreadPool().initialize( + unexpected_parts_loading_threads, + 0, // We don't need any threads one all the parts will be loaded + unexpected_parts_loading_threads); + + getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads); + const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128); getPartsCleaningThreadPool().initialize( cleanup_threads, @@ -211,7 +218,7 @@ void LocalServer::tryInitPath() else { // The path is not provided explicitly - use a unique path in the system temporary directory - // (or in the current dir if temporary don't exist) + // (or in the current dir if a temporary doesn't exist) LoggerRawPtr log = &logger(); std::filesystem::path parent_folder; std::filesystem::path default_path; @@ -237,39 +244,30 @@ void LocalServer::tryInitPath() /// as we can't accurately distinguish those situations we don't touch any existent folders /// we just try to pick some free name for our working folder - default_path = parent_folder / fmt::format("clickhouse-local-{}-{}-{}", getpid(), time(nullptr), randomSeed()); + default_path = parent_folder / fmt::format("clickhouse-local-{}", UUIDHelpers::generateV4()); - if (exists(default_path)) - throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessful attempt to create working directory: {} already exists.", default_path.string()); + if (fs::exists(default_path)) + throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessful attempt to set up the working directory: {} already exists.", default_path.string()); - create_directory(default_path); + /// The directory can be created lazily during the runtime. temporary_directory_to_delete = default_path; path = default_path.string(); LOG_DEBUG(log, "Working directory created: {}", path); } - if (path.back() != '/') - path += '/'; + global_context->setPath(fs::path(path) / ""); - fs::create_directories(fs::path(path) / "user_defined/"); - fs::create_directories(fs::path(path) / "data/"); - fs::create_directories(fs::path(path) / "metadata/"); - fs::create_directories(fs::path(path) / "metadata_dropped/"); + global_context->setTemporaryStoragePath(fs::path(path) / "tmp" / "", 0); + global_context->setFlagsPath(fs::path(path) / "flags" / ""); - global_context->setPath(path); - - global_context->setTemporaryStoragePath(path + "tmp/", 0); - global_context->setFlagsPath(path + "flags"); - - global_context->setUserFilesPath(""); // user's files are everywhere + global_context->setUserFilesPath(""); /// user's files are everywhere std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/"); global_context->setUserScriptsPath(user_scripts_path); - fs::create_directories(user_scripts_path); /// top_level_domains_lists - const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/"); + const std::string & top_level_domains_path = config().getString("top_level_domains_path", fs::path(path) / "top_level_domains/"); if (!top_level_domains_path.empty()) TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config()); } @@ -299,10 +297,9 @@ void LocalServer::cleanup() // Delete the temporary directory if needed. if (temporary_directory_to_delete) { - const auto dir = *temporary_directory_to_delete; + LOG_DEBUG(&logger(), "Removing temporary directory: {}", temporary_directory_to_delete->string()); + fs::remove_all(*temporary_directory_to_delete); temporary_directory_to_delete.reset(); - LOG_DEBUG(&logger(), "Removing temporary directory: {}", dir.string()); - remove_all(dir); } } catch (...) @@ -481,6 +478,9 @@ try registerFormats(); processConfig(); + + SCOPE_EXIT({ cleanup(); }); + initTTYBuffer(toProgressOption(config().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); @@ -530,16 +530,12 @@ try } catch (const DB::Exception & e) { - cleanup(); - bool need_print_stack_trace = config().getBool("stacktrace", false); std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl; return e.code() ? e.code() : -1; } catch (...) { - cleanup(); - std::cerr << getCurrentExceptionMessage(false) << std::endl; return getCurrentExceptionCode(); } @@ -611,7 +607,7 @@ void LocalServer::processConfig() if (config().has("macros")) global_context->setMacros(std::make_unique(config(), "macros", log)); - setDefaultFormatsFromConfiguration(); + setDefaultFormatsAndCompressionFromConfiguration(); /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); @@ -705,6 +701,7 @@ void LocalServer::processConfig() if (config().has("path")) { String path = global_context->getPath(); + fs::create_directories(fs::path(path)); /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); diff --git a/programs/main.cpp b/programs/main.cpp index 9ad8b016c82..c270388f17f 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -15,7 +15,7 @@ #include "config_tools.h" -#include +#include #include #include @@ -119,7 +119,7 @@ std::pair clickhouse_short_names[] = }; -enum class InstructionFail +enum class InstructionFail : uint8_t { NONE = 0, SSE3 = 1, @@ -155,8 +155,8 @@ auto instructionFailToString(InstructionFail fail) ret("AVX2"); case InstructionFail::AVX512: ret("AVX512"); +#undef ret } - UNREACHABLE(); } @@ -491,9 +491,13 @@ int main(int argc_, char ** argv_) /// clickhouse -q 'select 1' # use local /// clickhouse # spawn local /// clickhouse local # spawn local + /// clickhouse "select ..." # spawn local /// - if (main_func == printHelp && !argv.empty() && (argv.size() == 1 || argv[1][0] == '-')) + if (main_func == printHelp && !argv.empty() && (argv.size() == 1 || argv[1][0] == '-' + || std::string_view(argv[1]).contains(' '))) + { main_func = mainEntryClickHouseLocal; + } int exit_code = main_func(static_cast(argv.size()), argv.data()); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index b2bf942af4e..688ae1a1143 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -674,8 +674,7 @@ private: if (pos + length > end) length = end - pos; - if (length > sizeof(CodePoint)) - length = sizeof(CodePoint); + length = std::min(length, sizeof(CodePoint)); CodePoint res = 0; memcpy(&res, pos, length); @@ -883,9 +882,7 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in markov model"); size_t offset_from_begin_of_string = pos - data; - size_t determinator_sliding_window_size = params.determinator_sliding_window_size; - if (determinator_sliding_window_size > determinator_size) - determinator_sliding_window_size = determinator_size; + size_t determinator_sliding_window_size = std::min(params.determinator_sliding_window_size, determinator_size); size_t determinator_sliding_window_overflow = offset_from_begin_of_string + determinator_sliding_window_size > determinator_size ? offset_from_begin_of_string + determinator_sliding_window_size - determinator_size : 0; diff --git a/programs/odbc-bridge/ODBCSource.cpp b/programs/odbc-bridge/ODBCSource.cpp index 7f0d47f7e2e..940970f36ab 100644 --- a/programs/odbc-bridge/ODBCSource.cpp +++ b/programs/odbc-bridge/ODBCSource.cpp @@ -119,8 +119,7 @@ void ODBCSource::insertValue( time_t time = 0; const DataTypeDateTime & datetime_type = assert_cast(*data_type); readDateTimeText(time, in, datetime_type.getTimeZone()); - if (time < 0) - time = 0; + time = std::max(time, 0); column.insert(static_cast(time)); break; } diff --git a/programs/odbc-bridge/getIdentifierQuote.cpp b/programs/odbc-bridge/getIdentifierQuote.cpp index 15bd055e615..c0c833e5b8c 100644 --- a/programs/odbc-bridge/getIdentifierQuote.cpp +++ b/programs/odbc-bridge/getIdentifierQuote.cpp @@ -37,7 +37,7 @@ std::string getIdentifierQuote(nanodbc::ConnectionHolderPtr connection_holder) IdentifierQuotingStyle getQuotingStyle(nanodbc::ConnectionHolderPtr connection) { auto identifier_quote = getIdentifierQuote(connection); - if (identifier_quote.length() == 0) + if (identifier_quote.empty()) return IdentifierQuotingStyle::None; else if (identifier_quote[0] == '`') return IdentifierQuotingStyle::Backticks; diff --git a/programs/odbc-bridge/validateODBCConnectionString.cpp b/programs/odbc-bridge/validateODBCConnectionString.cpp index 6c6e11162b4..72c3c9bddca 100644 --- a/programs/odbc-bridge/validateODBCConnectionString.cpp +++ b/programs/odbc-bridge/validateODBCConnectionString.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "validateODBCConnectionString.h" diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 81440b03690..be696ff2afe 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -13,9 +13,6 @@ set (CLICKHOUSE_SERVER_LINK clickhouse_parsers clickhouse_storages_system clickhouse_table_functions - string_utils - - ${LINK_RESOURCE_LIB} PUBLIC daemon diff --git a/programs/server/MetricsTransmitter.h b/programs/server/MetricsTransmitter.h index 23420117b56..24069a60071 100644 --- a/programs/server/MetricsTransmitter.h +++ b/programs/server/MetricsTransmitter.h @@ -56,10 +56,10 @@ private: std::condition_variable cond; std::optional thread; - static inline constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; - static inline constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative."; - static inline constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; - static inline constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics."; + static constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; + static constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative."; + static constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; + static constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics."; }; } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 326f632d61d..8fcb9d87a93 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -792,9 +792,32 @@ try LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); }); + /// This object will periodically calculate some metrics. + ServerAsynchronousMetrics async_metrics( + global_context, + server_settings.asynchronous_metrics_update_period_s, + server_settings.asynchronous_heavy_metrics_update_period_s, + [&]() -> std::vector + { + std::vector metrics; + + std::lock_guard lock(servers_lock); + metrics.reserve(servers_to_start_before_tables.size() + servers.size()); + + for (const auto & server : servers_to_start_before_tables) + metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); + + for (const auto & server : servers) + metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); + return metrics; + } + ); + /// NOTE: global context should be destroyed *before* GlobalThreadPool::shutdown() /// Otherwise GlobalThreadPool::shutdown() will hang, since Context holds some threads. SCOPE_EXIT({ + async_metrics.stop(); + /** Ask to cancel background jobs all table engines, * and also query_log. * It is important to do early, not in destructor of Context, because @@ -885,6 +908,16 @@ try server_settings.max_active_parts_loading_thread_pool_size ); + getUnexpectedPartsLoadingThreadPool().initialize( + server_settings.max_unexpected_parts_loading_thread_pool_size, + 0, // We don't need any threads once all the parts will be loaded + server_settings.max_unexpected_parts_loading_thread_pool_size); + + /// It could grow if we need to synchronously wait until all the data parts will be loaded. + getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads( + server_settings.max_active_parts_loading_thread_pool_size + ); + getPartsCleaningThreadPool().initialize( server_settings.max_parts_cleaning_thread_pool_size, 0, // We don't need any threads one all the parts will be deleted @@ -911,27 +944,6 @@ try } } - /// This object will periodically calculate some metrics. - ServerAsynchronousMetrics async_metrics( - global_context, - server_settings.asynchronous_metrics_update_period_s, - server_settings.asynchronous_heavy_metrics_update_period_s, - [&]() -> std::vector - { - std::vector metrics; - - std::lock_guard lock(servers_lock); - metrics.reserve(servers_to_start_before_tables.size() + servers.size()); - - for (const auto & server : servers_to_start_before_tables) - metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); - - for (const auto & server : servers) - metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); - return metrics; - } - ); - zkutil::validateZooKeeperConfig(config()); bool has_zookeeper = zkutil::hasZooKeeperConfig(config()); @@ -1205,11 +1217,11 @@ try } { - fs::create_directories(path / "data/"); - fs::create_directories(path / "metadata/"); + fs::create_directories(path / "data"); + fs::create_directories(path / "metadata"); /// Directory with metadata of tables, which was marked as dropped by Atomic database - fs::create_directories(path / "metadata_dropped/"); + fs::create_directories(path / "metadata_dropped"); } if (config().has("interserver_http_port") && config().has("interserver_https_port")) @@ -1466,6 +1478,8 @@ try global_context->setMaxTableSizeToDrop(new_server_settings.max_table_size_to_drop); global_context->setMaxPartitionSizeToDrop(new_server_settings.max_partition_size_to_drop); global_context->setMaxTableNumToWarn(new_server_settings.max_table_num_to_warn); + global_context->setMaxViewNumToWarn(new_server_settings.max_view_num_to_warn); + global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn); global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn); global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn); @@ -1736,6 +1750,11 @@ try } + if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX)) + { + PlacementInfo::PlacementInfo::instance().initialize(config()); + } + { std::lock_guard lock(servers_lock); /// We should start interserver communications before (and more important shutdown after) tables. @@ -2084,11 +2103,6 @@ try load_metadata_tasks); } - if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX)) - { - PlacementInfo::PlacementInfo::instance().initialize(config()); - } - /// Do not keep tasks in server, they should be kept inside databases. Used here to make dependent tasks only. load_metadata_tasks.clear(); load_metadata_tasks.shrink_to_fit(); diff --git a/programs/server/config.xml b/programs/server/config.xml index 27ed5952fc9..4b3248d9d1c 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -715,7 +715,7 @@ + By default this setting is true. --> true depending on how many elements input_rows_count overflows, forward timestamp by at least 1 tick + end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1); + else + end.timestamp = begin.timestamp; + + end.machine_id = begin.machine_id; + end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask; + + return {begin, end}; +} + +struct Data +{ + /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously. + static inline std::atomic lowest_available_snowflake_id = 0; + + SnowflakeId reserveRange(size_t input_rows_count) + { + uint64_t available_snowflake_id = lowest_available_snowflake_id.load(); + SnowflakeIdRange range; + do + { + range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count); + } + while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end))); + /// CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try + /// else --> our thread reserved ID range [begin, end) and return the beginning of the range + + return range.begin; + } +}; + +} + +class FunctionGenerateSnowflakeID : public IFunction +{ +public: + static constexpr auto name = "generateSnowflakeID"; + + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors mandatory_args; + FunctionArgumentDescriptors optional_args{ + {"expr", nullptr, nullptr, "Arbitrary expression"} + }; + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override + { + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_to = col_res->getData(); + + if (input_rows_count != 0) + { + vec_to.resize(input_rows_count); + + Data data; + SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range + + for (UInt64 & to_row : vec_to) + { + to_row = fromSnowflakeId(snowflake_id); + if (snowflake_id.machine_seq_num == max_machine_seq_num) + { + /// handle overflow + snowflake_id.machine_seq_num = 0; + ++snowflake_id.timestamp; + } + else + { + ++snowflake_id.machine_seq_num; + } + } + } + + return col_res; + } + +}; + +REGISTER_FUNCTION(GenerateSnowflakeID) +{ + FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression])"; + FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; + FunctionDocumentation::Examples examples = {{"single", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"multiple", "SELECT generateSnowflakeID(1), generateSnowflakeID(2)", ""}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); +} + +} diff --git a/src/Functions/generateULID.cpp b/src/Functions/generateULID.cpp index 9c5c9403185..f2f2d8ae3b9 100644 --- a/src/Functions/generateULID.cpp +++ b/src/Functions/generateULID.cpp @@ -17,7 +17,7 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } class FunctionGenerateULID : public IFunction @@ -45,7 +45,7 @@ public: { if (arguments.size() > 1) throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.", getName(), arguments.size()); diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index 411a3a076ac..b226c0840f4 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -73,20 +73,6 @@ void setVariant(UUID & uuid) UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & rand_b_bits_mask) | variant_2_mask; } -struct FillAllRandomPolicy -{ - static constexpr auto name = "generateUUIDv7NonMonotonic"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)"; - struct Data - { - void generate(UUID & uuid, uint64_t ts) - { - setTimestampAndVersion(uuid, ts); - setVariant(uuid); - } - }; -}; - struct CounterFields { uint64_t last_timestamp = 0; @@ -133,44 +119,21 @@ struct CounterFields }; -struct GlobalCounterPolicy +struct Data { - static constexpr auto name = "generateUUIDv7"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; - /// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously. - struct Data + static inline CounterFields fields; + static inline SharedMutex mutex; /// works a little bit faster than std::mutex here + std::lock_guard guard; + + Data() + : guard(mutex) + {} + + void generate(UUID & uuid, uint64_t timestamp) { - static inline CounterFields fields; - static inline SharedMutex mutex; /// works a little bit faster than std::mutex here - std::lock_guard guard; - - Data() - : guard(mutex) - {} - - void generate(UUID & uuid, uint64_t timestamp) - { - fields.generate(uuid, timestamp); - } - }; -}; - -struct ThreadLocalCounterPolicy -{ - static constexpr auto name = "generateUUIDv7ThreadMonotonic"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)"; - - /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads. - struct Data - { - static inline thread_local CounterFields fields; - - void generate(UUID & uuid, uint64_t timestamp) - { - fields.generate(uuid, timestamp); - } - }; + fields.generate(uuid, timestamp); + } }; } @@ -181,12 +144,12 @@ DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__) DECLARE_SEVERAL_IMPLEMENTATIONS( -template -class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy +class FunctionGenerateUUIDv7Base : public IFunction { public: - String getName() const final { return FillPolicy::name; } + static constexpr auto name = "generateUUIDv7"; + String getName() const final { return name; } size_t getNumberOfArguments() const final { return 0; } bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const final { return false; } @@ -198,7 +161,7 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "Arbitrary expression"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -222,7 +185,7 @@ public: uint64_t timestamp = getTimestampMillisecond(); for (UUID & uuid : vec_to) { - typename FillPolicy::Data data; + Data data; data.generate(uuid, timestamp); } } @@ -232,19 +195,18 @@ public: ) // DECLARE_SEVERAL_IMPLEMENTATIONS #undef DECLARE_SEVERAL_IMPLEMENTATIONS -template -class FunctionGenerateUUIDv7Base : public TargetSpecific::Default::FunctionGenerateUUIDv7Base +class FunctionGenerateUUIDv7Base : public TargetSpecific::Default::FunctionGenerateUUIDv7Base { public: - using Self = FunctionGenerateUUIDv7Base; - using Parent = TargetSpecific::Default::FunctionGenerateUUIDv7Base; + using Self = FunctionGenerateUUIDv7Base; + using Parent = TargetSpecific::Default::FunctionGenerateUUIDv7Base; explicit FunctionGenerateUUIDv7Base(ContextPtr context) : selector(context) { selector.registerImplementation(); #if USE_MULTITARGET_CODE - using ParentAVX2 = TargetSpecific::AVX2::FunctionGenerateUUIDv7Base; + using ParentAVX2 = TargetSpecific::AVX2::FunctionGenerateUUIDv7Base; selector.registerImplementation(); #endif } @@ -263,27 +225,16 @@ private: ImplementationSelector selector; }; -template -void registerUUIDv7Generator(auto& factory) -{ - static constexpr auto doc_syntax_format = "{}([expression])"; - static constexpr auto example_format = "SELECT {}()"; - static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)"; - - FunctionDocumentation::Description doc_description = FillPolicy::doc_description; - FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name); - FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; - FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7."; - FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; - FunctionDocumentation::Categories doc_categories = {"UUID"}; - - factory.template registerFunction>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive); -} - REGISTER_FUNCTION(GenerateUUIDv7) { - registerUUIDv7Generator(factory); - registerUUIDv7Generator(factory); - registerUUIDv7Generator(factory); + FunctionDocumentation::Description description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + FunctionDocumentation::Syntax syntax = "SELECT generateUUIDv7()"; + FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::ReturnedValue returned_value = "A value of type UUID version 7."; + FunctionDocumentation::Examples examples = {{"single", "SELECT generateUUIDv7()", ""}, {"multiple", "SELECT generateUUIDv7(1), generateUUIDv7(2)", ""}}; + FunctionDocumentation::Categories categories = {"UUID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); } + } diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 01184f74b13..1bd71f19f76 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -41,7 +41,7 @@ namespace ErrorCodes namespace { -enum class Method +enum class Method : uint8_t { SPHERE_DEGREES, SPHERE_METERS, @@ -94,13 +94,13 @@ struct Impl } } - static inline NO_SANITIZE_UNDEFINED size_t toIndex(T x) + static NO_SANITIZE_UNDEFINED size_t toIndex(T x) { /// Implementation specific behaviour on overflow or infinite value. return static_cast(x); } - static inline T degDiff(T f) + static T degDiff(T f) { f = std::abs(f); if (f > 180) @@ -108,7 +108,7 @@ struct Impl return f; } - inline T fastCos(T x) + T fastCos(T x) { T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0)); size_t i = toIndex(y); @@ -117,7 +117,7 @@ struct Impl return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; } - inline T fastSin(T x) + T fastSin(T x) { T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0)); size_t i = toIndex(y); @@ -128,7 +128,7 @@ struct Impl /// fast implementation of asin(sqrt(x)) /// max error in floats 0.00369%, in doubles 0.00072% - inline T fastAsinSqrt(T x) + T fastAsinSqrt(T x) { if (x < T(0.122)) { diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 93fd7e24853..87a48c887b4 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -15,7 +15,7 @@ struct GreatestBaseImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) > static_cast(b) ? static_cast(a) : static_cast(b); @@ -24,7 +24,7 @@ struct GreatestBaseImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) { @@ -46,7 +46,7 @@ struct GreatestSpecialImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { static_assert(std::is_same_v, "ResultType != Result"); return accurate::greaterOp(a, b) ? static_cast(a) : static_cast(b); diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index 830c509f1f5..ddf58c50e8d 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -47,6 +47,10 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + /// Change it to never return LowCardinality, making it consistent when using groupingForRollup / groupingForforCube + /// with __grouping_set + bool canBeExecutedOnLowCardinalityDictionary() const override { return false; } + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); @@ -55,7 +59,7 @@ public: template ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count, AggregationKeyChecker checker) const { - const auto * grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + const auto & grouping_set_column = checkAndGetColumn(*arguments[0].column); auto result = ColumnUInt64::create(); auto & result_data = result->getData(); @@ -64,7 +68,7 @@ public: const auto * result_table = likely(force_compatibility) ? COMPATIBLE_MODE : INCOMPATIBLE_MODE; for (size_t i = 0; i < input_rows_count; ++i) { - UInt64 set_index = grouping_set_column->getElement(i); + UInt64 set_index = grouping_set_column.getElement(i); UInt64 value = 0; for (auto index : arguments_indexes) diff --git a/src/Functions/h3GetUnidirectionalEdge.cpp b/src/Functions/h3GetUnidirectionalEdge.cpp index 4e41cdbfef6..9e253e87104 100644 --- a/src/Functions/h3GetUnidirectionalEdge.cpp +++ b/src/Functions/h3GetUnidirectionalEdge.cpp @@ -108,7 +108,7 @@ public: /// suppress asan errors generated by the following: /// 'NEW_ADJUSTMENT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:142:24 /// 'NEW_DIGIT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:121:24 - __attribute__((no_sanitize_address)) static inline UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest) + __attribute__((no_sanitize_address)) static UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest) { const UInt64 res = cellsToDirectedEdge(origin, dest); return res; diff --git a/src/Functions/h3PointDist.cpp b/src/Functions/h3PointDist.cpp index 00b8fb0089e..889675a2dda 100644 --- a/src/Functions/h3PointDist.cpp +++ b/src/Functions/h3PointDist.cpp @@ -49,7 +49,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. Must be Float64", - arg->getName(), i, getName()); + arg->getName(), i + 1, getName()); } return std::make_shared(); } diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp index 48783a672e2..8ea16f688ee 100644 --- a/src/Functions/hasColumnInTable.cpp +++ b/src/Functions/hasColumnInTable.cpp @@ -88,8 +88,8 @@ ColumnPtr FunctionHasColumnInTable::executeImpl(const ColumnsWithTypeAndName & a { auto get_string_from_columns = [&](const ColumnWithTypeAndName & column) -> String { - const ColumnConst * const_column = checkAndGetColumnConst(column.column.get()); - return const_column->getValue(); + const ColumnConst & const_column = checkAndGetColumnConst(*column.column); + return const_column.getValue(); }; size_t arg = 0; diff --git a/src/Functions/hilbertDecode.cpp b/src/Functions/hilbertDecode.cpp new file mode 100644 index 00000000000..df7f98f56ac --- /dev/null +++ b/src/Functions/hilbertDecode.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include "hilbertDecode2DLUT.h" +#include + + +namespace DB +{ + +class FunctionHilbertDecode : public FunctionSpaceFillingCurveDecode<2, 0, 32> +{ +public: + static constexpr auto name = "hilbertDecode"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override { return name; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + size_t num_dimensions; + const auto * col_const = typeid_cast(arguments[0].column.get()); + const auto * mask = typeid_cast(col_const->getDataColumnPtr().get()); + if (mask) + num_dimensions = mask->tupleSize(); + else + num_dimensions = col_const->getUInt(0); + const ColumnPtr & col_code = arguments[1].column; + Columns tuple_columns(num_dimensions); + + const auto shrink = [mask](const UInt64 value, const UInt8 column_num) + { + if (mask) + return value >> mask->getColumn(column_num).getUInt(0); + return value; + }; + + auto col0 = ColumnUInt64::create(); + auto & vec0 = col0->getData(); + vec0.resize(input_rows_count); + + if (num_dimensions == 1) + { + for (size_t i = 0; i < input_rows_count; i++) + { + vec0[i] = shrink(col_code->getUInt(i), 0); + } + tuple_columns[0] = std::move(col0); + return ColumnTuple::create(tuple_columns); + } + + auto col1 = ColumnUInt64::create(); + auto & vec1 = col1->getData(); + vec1.resize(input_rows_count); + + if (num_dimensions == 2) + { + for (size_t i = 0; i < input_rows_count; i++) + { + const auto res = FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(col_code->getUInt(i)); + vec0[i] = shrink(std::get<0>(res), 0); + vec1[i] = shrink(std::get<1>(res), 1); + } + tuple_columns[0] = std::move(col0); + tuple_columns[1] = std::move(col1); + return ColumnTuple::create(tuple_columns); + } + + return ColumnTuple::create(tuple_columns); + } +}; + + +REGISTER_FUNCTION(HilbertDecode) +{ + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Decodes a Hilbert curve index back into a tuple of unsigned integers, representing coordinates in multi-dimensional space. + +The function has two modes of operation: +- Simple +- Expanded + +Simple Mode: Accepts the desired tuple size as the first argument (up to 2) and the Hilbert index as the second argument. This mode decodes the index into a tuple of the specified size. +[example:simple] +Will decode into: `(8, 0)` +The resulting tuple size cannot be more than 2 + +Expanded Mode: Takes a range mask (tuple) as the first argument and the Hilbert index as the second argument. +Each number in the mask specifies the number of bits by which the corresponding decoded argument will be right-shifted, effectively scaling down the output values. +[example:range_shrank] +Note: see hilbertEncode() docs on why range change might be beneficial. +Still limited to 2 numbers at most. + +Hilbert code for one argument is always the argument itself (as a tuple). +[example:identity] +Produces: `(1)` + +A single argument with a tuple specifying bit shifts will be right-shifted accordingly. +[example:identity_shrank] +Produces: `(128)` + +The function accepts a column of codes as a second argument: +[example:from_table] + +The range tuple must be a constant: +[example:from_table_range] +)", + .examples{ + {"simple", "SELECT hilbertDecode(2, 64)", ""}, + {"range_shrank", "SELECT hilbertDecode((1,2), 1572864)", ""}, + {"identity", "SELECT hilbertDecode(1, 1)", ""}, + {"identity_shrank", "SELECT hilbertDecode(tuple(2), 512)", ""}, + {"from_table", "SELECT hilbertDecode(2, code) FROM table", ""}, + {"from_table_range", "SELECT hilbertDecode((1,2), code) FROM table", ""}, + }, + .categories {"Hilbert coding", "Hilbert Curve"} + }); +} + +} diff --git a/src/Functions/hilbertDecode2DLUT.h b/src/Functions/hilbertDecode2DLUT.h new file mode 100644 index 00000000000..804ba4eb23f --- /dev/null +++ b/src/Functions/hilbertDecode2DLUT.h @@ -0,0 +1,145 @@ +#pragma once +#include + + +namespace DB +{ + +namespace HilbertDetails +{ + +template +class HilbertDecodeLookupTable +{ +public: + constexpr static UInt8 LOOKUP_TABLE[0] = {}; +}; + +template <> +class HilbertDecodeLookupTable<1> +{ +public: + constexpr static UInt8 LOOKUP_TABLE[16] = { + 4, 1, 3, 10, + 0, 6, 7, 13, + 15, 9, 8, 2, + 11, 14, 12, 5 + }; +}; + +template <> +class HilbertDecodeLookupTable<2> +{ +public: + constexpr static UInt8 LOOKUP_TABLE[64] = { + 0, 20, 21, 49, 18, 3, 7, 38, + 26, 11, 15, 46, 61, 41, 40, 12, + 16, 1, 5, 36, 8, 28, 29, 57, + 10, 30, 31, 59, 39, 54, 50, 19, + 47, 62, 58, 27, 55, 35, 34, 6, + 53, 33, 32, 4, 24, 9, 13, 44, + 63, 43, 42, 14, 45, 60, 56, 25, + 37, 52, 48, 17, 2, 22, 23, 51 + }; +}; + +template <> +class HilbertDecodeLookupTable<3> +{ +public: + constexpr static UInt8 LOOKUP_TABLE[256] = { + 64, 1, 9, 136, 16, 88, 89, 209, 18, 90, 91, 211, 139, 202, 194, 67, + 4, 76, 77, 197, 70, 7, 15, 142, 86, 23, 31, 158, 221, 149, 148, 28, + 36, 108, 109, 229, 102, 39, 47, 174, 118, 55, 63, 190, 253, 181, 180, 60, + 187, 250, 242, 115, 235, 163, 162, 42, 233, 161, 160, 40, 112, 49, 57, 184, + 0, 72, 73, 193, 66, 3, 11, 138, 82, 19, 27, 154, 217, 145, 144, 24, + 96, 33, 41, 168, 48, 120, 121, 241, 50, 122, 123, 243, 171, 234, 226, 99, + 100, 37, 45, 172, 52, 124, 125, 245, 54, 126, 127, 247, 175, 238, 230, 103, + 223, 151, 150, 30, 157, 220, 212, 85, 141, 204, 196, 69, 6, 78, 79, 199, + 255, 183, 182, 62, 189, 252, 244, 117, 173, 236, 228, 101, 38, 110, 111, 231, + 159, 222, 214, 87, 207, 135, 134, 14, 205, 133, 132, 12, 84, 21, 29, 156, + 155, 218, 210, 83, 203, 131, 130, 10, 201, 129, 128, 8, 80, 17, 25, 152, + 32, 104, 105, 225, 98, 35, 43, 170, 114, 51, 59, 186, 249, 177, 176, 56, + 191, 254, 246, 119, 239, 167, 166, 46, 237, 165, 164, 44, 116, 53, 61, 188, + 251, 179, 178, 58, 185, 248, 240, 113, 169, 232, 224, 97, 34, 106, 107, 227, + 219, 147, 146, 26, 153, 216, 208, 81, 137, 200, 192, 65, 2, 74, 75, 195, + 68, 5, 13, 140, 20, 92, 93, 213, 22, 94, 95, 215, 143, 206, 198, 71 + }; +}; + +} + +template +class FunctionHilbertDecode2DWIthLookupTableImpl +{ + static_assert(bit_step <= 3, "bit_step should not be more than 3 to fit in UInt8"); +public: + static std::tuple decode(UInt64 hilbert_code) + { + UInt64 x = 0; + UInt64 y = 0; + const auto leading_zeros_count = getLeadingZeroBits(hilbert_code); + const auto used_bits = std::numeric_limits::digits - leading_zeros_count; + + auto [current_shift, state] = getInitialShiftAndState(used_bits); + + while (current_shift >= 0) + { + const UInt8 hilbert_bits = (hilbert_code >> current_shift) & HILBERT_MASK; + const auto [x_bits, y_bits] = getCodeAndUpdateState(hilbert_bits, state); + x |= (x_bits << (current_shift >> 1)); + y |= (y_bits << (current_shift >> 1)); + current_shift -= getHilbertShift(bit_step); + } + + return {x, y}; + } + +private: + // for bit_step = 3 + // LOOKUP_TABLE[SSHHHHHH] = SSXXXYYY + // where SS - 2 bits for state, XXX - 3 bits of x, YYY - 3 bits of y + // State is rotation of curve on every step, left/up/right/down - therefore 2 bits + static std::pair getCodeAndUpdateState(UInt8 hilbert_bits, UInt8& state) + { + const UInt8 table_index = state | hilbert_bits; + const auto table_code = HilbertDetails::HilbertDecodeLookupTable::LOOKUP_TABLE[table_index]; + state = table_code & STATE_MASK; + const UInt64 x_bits = (table_code & X_MASK) >> bit_step; + const UInt64 y_bits = table_code & Y_MASK; + return {x_bits, y_bits}; + } + + // hilbert code is double size of input values + static constexpr UInt8 getHilbertShift(UInt8 shift) + { + return shift << 1; + } + + static std::pair getInitialShiftAndState(UInt8 used_bits) + { + UInt8 iterations = used_bits / HILBERT_SHIFT; + Int8 initial_shift = iterations * HILBERT_SHIFT; + if (initial_shift < used_bits) + { + ++iterations; + } + else + { + initial_shift -= HILBERT_SHIFT; + } + UInt8 state = iterations % 2 == 0 ? LEFT_STATE : DEFAULT_STATE; + return {initial_shift, state}; + } + + constexpr static UInt8 STEP_MASK = (1 << bit_step) - 1; + constexpr static UInt8 HILBERT_SHIFT = getHilbertShift(bit_step); + constexpr static UInt8 HILBERT_MASK = (1 << HILBERT_SHIFT) - 1; + constexpr static UInt8 STATE_MASK = 0b11 << HILBERT_SHIFT; + constexpr static UInt8 Y_MASK = STEP_MASK; + constexpr static UInt8 X_MASK = STEP_MASK << bit_step; + constexpr static UInt8 LEFT_STATE = 0b01 << HILBERT_SHIFT; + constexpr static UInt8 DEFAULT_STATE = bit_step % 2 == 0 ? LEFT_STATE : 0; +}; + +} diff --git a/src/Functions/hilbertEncode.cpp b/src/Functions/hilbertEncode.cpp new file mode 100644 index 00000000000..13512d0d36c --- /dev/null +++ b/src/Functions/hilbertEncode.cpp @@ -0,0 +1,150 @@ +#include "hilbertEncode2DLUT.h" +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; +} + + +class FunctionHilbertEncode : public FunctionSpaceFillingCurveEncode +{ +public: + static constexpr auto name = "hilbertEncode"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override { return name; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + if (input_rows_count == 0) + return ColumnUInt64::create(); + + size_t num_dimensions = arguments.size(); + size_t vector_start_index = 0; + const auto * const_col = typeid_cast(arguments[0].column.get()); + const ColumnTuple * mask; + if (const_col) + mask = typeid_cast(const_col->getDataColumnPtr().get()); + else + mask = typeid_cast(arguments[0].column.get()); + if (mask) + { + num_dimensions = mask->tupleSize(); + vector_start_index = 1; + for (size_t i = 0; i < num_dimensions; i++) + { + auto ratio = mask->getColumn(i).getUInt(0); + if (ratio > 32) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal argument {} of function {}, should be a number in range 0-32", + arguments[0].column->getName(), getName()); + } + } + + auto col_res = ColumnUInt64::create(); + ColumnUInt64::Container & vec_res = col_res->getData(); + vec_res.resize(input_rows_count); + + const auto expand = [mask](const UInt64 value, const UInt8 column_num) + { + if (mask) + return value << mask->getColumn(column_num).getUInt(0); + return value; + }; + + const ColumnPtr & col0 = arguments[0 + vector_start_index].column; + if (num_dimensions == 1) + { + for (size_t i = 0; i < input_rows_count; ++i) + { + vec_res[i] = expand(col0->getUInt(i), 0); + } + return col_res; + } + + const ColumnPtr & col1 = arguments[1 + vector_start_index].column; + if (num_dimensions == 2) + { + for (size_t i = 0; i < input_rows_count; ++i) + { + vec_res[i] = FunctionHilbertEncode2DWIthLookupTableImpl<3>::encode( + expand(col0->getUInt(i), 0), + expand(col1->getUInt(i), 1)); + } + return col_res; + } + + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal number of UInt arguments of function {}: should be not more than 2 dimensions", + getName()); + } +}; + + +REGISTER_FUNCTION(HilbertEncode) +{ + factory.registerFunction(FunctionDocumentation{ + .description=R"( +Calculates code for Hilbert Curve for a list of unsigned integers. + +The function has two modes of operation: +- Simple +- Expanded + +Simple: accepts up to 2 unsigned integers as arguments and produces a UInt64 code. +[example:simple] +Produces: `31` + +Expanded: accepts a range mask (tuple) as a first argument and up to 2 unsigned integers as other arguments. +Each number in the mask configures the number of bits by which the corresponding argument will be shifted left, effectively scaling the argument within its range. +[example:range_expanded] +Produces: `4031541586602` +Note: tuple size must be equal to the number of the other arguments + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF) + +For a single argument without a tuple, the function returns the argument itself as the Hilbert index, since no dimensional mapping is needed. +[example:identity] +Produces: `1` + +If a single argument is provided with a tuple specifying bit shifts, the function shifts the argument left by the specified number of bits. +[example:identity_expanded] +Produces: `512` + +The function also accepts columns as arguments: +[example:from_table] + +But the range tuple must still be a constant: +[example:from_table_range] + +Please note that you can fit only so much bits of information into Hilbert code as UInt64 has. +Two arguments will have a range of maximum 2^32 (64/2) each +All overflow will be clamped to zero +)", + .examples{ + {"simple", "SELECT hilbertEncode(3, 4)", ""}, + {"range_expanded", "SELECT hilbertEncode((10,6), 1024, 16)", ""}, + {"identity", "SELECT hilbertEncode(1)", ""}, + {"identity_expanded", "SELECT hilbertEncode(tuple(2), 128)", ""}, + {"from_table", "SELECT hilbertEncode(n1, n2) FROM table", ""}, + {"from_table_range", "SELECT hilbertEncode((1,2), n1, n2) FROM table", ""}, + }, + .categories {"Hilbert coding", "Hilbert Curve"} + }); +} + +} diff --git a/src/Functions/hilbertEncode2DLUT.h b/src/Functions/hilbertEncode2DLUT.h new file mode 100644 index 00000000000..413d976a762 --- /dev/null +++ b/src/Functions/hilbertEncode2DLUT.h @@ -0,0 +1,142 @@ +#pragma once +#include + + +namespace DB +{ + +namespace HilbertDetails +{ + +template +class HilbertEncodeLookupTable +{ +public: + constexpr static UInt8 LOOKUP_TABLE[0] = {}; +}; + +template <> +class HilbertEncodeLookupTable<1> +{ +public: + constexpr static UInt8 LOOKUP_TABLE[16] = { + 4, 1, 11, 2, + 0, 15, 5, 6, + 10, 9, 3, 12, + 14, 7, 13, 8 + }; +}; + +template <> +class HilbertEncodeLookupTable<2> +{ +public: + constexpr static UInt8 LOOKUP_TABLE[64] = { + 0, 51, 20, 5, 17, 18, 39, 6, + 46, 45, 24, 9, 15, 60, 43, 10, + 16, 1, 62, 31, 35, 2, 61, 44, + 4, 55, 8, 59, 21, 22, 25, 26, + 42, 41, 38, 37, 11, 56, 7, 52, + 28, 13, 50, 19, 47, 14, 49, 32, + 58, 27, 12, 63, 57, 40, 29, 30, + 54, 23, 34, 33, 53, 36, 3, 48 + }; +}; + + +template <> +class HilbertEncodeLookupTable<3> +{ +public: + constexpr static UInt8 LOOKUP_TABLE[256] = { + 64, 1, 206, 79, 16, 211, 84, 21, 131, 2, 205, 140, 81, 82, 151, 22, 4, + 199, 8, 203, 158, 157, 88, 25, 69, 70, 73, 74, 31, 220, 155, 26, 186, + 185, 182, 181, 32, 227, 100, 37, 59, 248, 55, 244, 97, 98, 167, 38, 124, + 61, 242, 115, 174, 173, 104, 41, 191, 62, 241, 176, 47, 236, 171, 42, 0, + 195, 68, 5, 250, 123, 60, 255, 65, 66, 135, 6, 249, 184, 125, 126, 142, + 141, 72, 9, 246, 119, 178, 177, 15, 204, 139, 10, 245, 180, 51, 240, 80, + 17, 222, 95, 96, 33, 238, 111, 147, 18, 221, 156, 163, 34, 237, 172, 20, + 215, 24, 219, 36, 231, 40, 235, 85, 86, 89, 90, 101, 102, 105, 106, 170, + 169, 166, 165, 154, 153, 150, 149, 43, 232, 39, 228, 27, 216, 23, 212, 108, + 45, 226, 99, 92, 29, 210, 83, 175, 46, 225, 160, 159, 30, 209, 144, 48, + 243, 116, 53, 202, 75, 12, 207, 113, 114, 183, 54, 201, 136, 77, 78, 190, + 189, 120, 57, 198, 71, 130, 129, 63, 252, 187, 58, 197, 132, 3, 192, 234, + 107, 44, 239, 112, 49, 254, 127, 233, 168, 109, 110, 179, 50, 253, 188, 230, + 103, 162, 161, 52, 247, 56, 251, 229, 164, 35, 224, 117, 118, 121, 122, 218, + 91, 28, 223, 138, 137, 134, 133, 217, 152, 93, 94, 11, 200, 7, 196, 214, + 87, 146, 145, 76, 13, 194, 67, 213, 148, 19, 208, 143, 14, 193, 128, + }; +}; + +} + +template +class FunctionHilbertEncode2DWIthLookupTableImpl +{ + static_assert(bit_step <= 3, "bit_step should not be more than 3 to fit in UInt8"); +public: + static UInt64 encode(UInt64 x, UInt64 y) + { + UInt64 hilbert_code = 0; + const auto leading_zeros_count = getLeadingZeroBits(x | y); + const auto used_bits = std::numeric_limits::digits - leading_zeros_count; + if (used_bits > 32) + return 0; // hilbert code will be overflowed in this case + + auto [current_shift, state] = getInitialShiftAndState(used_bits); + while (current_shift >= 0) + { + const UInt8 x_bits = (x >> current_shift) & STEP_MASK; + const UInt8 y_bits = (y >> current_shift) & STEP_MASK; + const auto hilbert_bits = getCodeAndUpdateState(x_bits, y_bits, state); + hilbert_code |= (hilbert_bits << getHilbertShift(current_shift)); + current_shift -= bit_step; + } + + return hilbert_code; + } + +private: + // for bit_step = 3 + // LOOKUP_TABLE[SSXXXYYY] = SSHHHHHH + // where SS - 2 bits for state, XXX - 3 bits of x, YYY - 3 bits of y + // State is rotation of curve on every step, left/up/right/down - therefore 2 bits + static UInt64 getCodeAndUpdateState(UInt8 x_bits, UInt8 y_bits, UInt8& state) + { + const UInt8 table_index = state | (x_bits << bit_step) | y_bits; + const auto table_code = HilbertDetails::HilbertEncodeLookupTable::LOOKUP_TABLE[table_index]; + state = table_code & STATE_MASK; + return table_code & HILBERT_MASK; + } + + // hilbert code is double size of input values + static constexpr UInt8 getHilbertShift(UInt8 shift) + { + return shift << 1; + } + + static std::pair getInitialShiftAndState(UInt8 used_bits) + { + UInt8 iterations = used_bits / bit_step; + Int8 initial_shift = iterations * bit_step; + if (initial_shift < used_bits) + { + ++iterations; + } + else + { + initial_shift -= bit_step; + } + UInt8 state = iterations % 2 == 0 ? LEFT_STATE : DEFAULT_STATE; + return {initial_shift, state}; + } + + constexpr static UInt8 STEP_MASK = (1 << bit_step) - 1; + constexpr static UInt8 HILBERT_SHIFT = getHilbertShift(bit_step); + constexpr static UInt8 HILBERT_MASK = (1 << HILBERT_SHIFT) - 1; + constexpr static UInt8 STATE_MASK = 0b11 << HILBERT_SHIFT; + constexpr static UInt8 LEFT_STATE = 0b01 << HILBERT_SHIFT; + constexpr static UInt8 DEFAULT_STATE = bit_step % 2 == 0 ? LEFT_STATE : 0; +}; + +} diff --git a/src/Functions/idna.cpp b/src/Functions/idna.cpp index c9682b44b2c..5a7ae3485ba 100644 --- a/src/Functions/idna.cpp +++ b/src/Functions/idna.cpp @@ -26,7 +26,7 @@ namespace ErrorCodes /// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode /// and [3] https://www.unicode.org/reports/tr46/#ToUnicode -enum class ErrorHandling +enum class ErrorHandling : uint8_t { Throw, /// Throw exception Empty /// Return empty string diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 4f75042ad8d..4c08cd3b931 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -76,75 +77,17 @@ inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const Arr { size_t size = cond.size(); - bool a_is_short = a.size() < size; - bool b_is_short = b.size() < size; - - if (a_is_short && b_is_short) + for (size_t i = 0; i < size; ++i) { - size_t a_index = 0, b_index = 0; - for (size_t i = 0; i < size; ++i) + if constexpr (is_native_int_or_decimal_v) + res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b[i]); + else if constexpr (std::is_floating_point_v) { - if constexpr (is_native_int_or_decimal_v) - res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b[b_index]); - else if constexpr (std::is_floating_point_v) - { - BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b[b_index], res[i]) - } - else - res[i] = cond[i] ? static_cast(a[a_index]) : static_cast(b[b_index]); - - a_index += !!cond[i]; - b_index += !cond[i]; + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b[i], res[i]) } - } - else if (a_is_short) - { - size_t a_index = 0; - for (size_t i = 0; i < size; ++i) + else { - if constexpr (is_native_int_or_decimal_v) - res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b[i]); - else if constexpr (std::is_floating_point_v) - { - BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b[i], res[i]) - } - else - res[i] = cond[i] ? static_cast(a[a_index]) : static_cast(b[i]); - - a_index += !!cond[i]; - } - } - else if (b_is_short) - { - size_t b_index = 0; - for (size_t i = 0; i < size; ++i) - { - if constexpr (is_native_int_or_decimal_v) - res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b[b_index]); - else if constexpr (std::is_floating_point_v) - { - BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b[b_index], res[i]) - } - else - res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[b_index]); - - b_index += !cond[i]; - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - if constexpr (is_native_int_or_decimal_v) - res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b[i]); - else if constexpr (std::is_floating_point_v) - { - BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b[i], res[i]) - } - else - { - res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[i]); - } + res[i] = cond[i] ? static_cast(a[i]) : static_cast(b[i]); } } } @@ -153,37 +96,16 @@ template ) + res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b); + else if constexpr (std::is_floating_point_v) { - if constexpr (is_native_int_or_decimal_v) - res[i] = !!cond[i] * static_cast(a[a_index]) + (!cond[i]) * static_cast(b); - else if constexpr (std::is_floating_point_v) - { - BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b, res[i]) - } - else - res[i] = cond[i] ? static_cast(a[a_index]) : static_cast(b); - - a_index += !!cond[i]; - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - if constexpr (is_native_int_or_decimal_v) - res[i] = !!cond[i] * static_cast(a[i]) + (!cond[i]) * static_cast(b); - else if constexpr (std::is_floating_point_v) - { - BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b, res[i]) - } - else - res[i] = cond[i] ? static_cast(a[i]) : static_cast(b); + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b, res[i]) } + else + res[i] = cond[i] ? static_cast(a[i]) : static_cast(b); } } @@ -191,37 +113,16 @@ template ) + res[i] = !!cond[i] * static_cast(a) + (!cond[i]) * static_cast(b[i]); + else if constexpr (std::is_floating_point_v) { - if constexpr (is_native_int_or_decimal_v) - res[i] = !!cond[i] * static_cast(a) + (!cond[i]) * static_cast(b[b_index]); - else if constexpr (std::is_floating_point_v) - { - BRANCHFREE_IF_FLOAT(ResultType, cond[i], a, b[b_index], res[i]) - } - else - res[i] = cond[i] ? static_cast(a) : static_cast(b[b_index]); - - b_index += !cond[i]; - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - if constexpr (is_native_int_or_decimal_v) - res[i] = !!cond[i] * static_cast(a) + (!cond[i]) * static_cast(b[i]); - else if constexpr (std::is_floating_point_v) - { - BRANCHFREE_IF_FLOAT(ResultType, cond[i], a, b[i], res[i]) - } - else - res[i] = cond[i] ? static_cast(a) : static_cast(b[i]); + BRANCHFREE_IF_FLOAT(ResultType, cond[i], a, b[i], res[i]) } + else + res[i] = cond[i] ? static_cast(a) : static_cast(b[i]); } } @@ -879,9 +780,6 @@ private: bool then_is_const = isColumnConst(*col_then); bool else_is_const = isColumnConst(*col_else); - bool then_is_short = col_then->size() < cond_col->size(); - bool else_is_short = col_else->size() < cond_col->size(); - const auto & cond_array = cond_col->getData(); if (then_is_const && else_is_const) @@ -901,37 +799,34 @@ private: { const IColumn & then_nested_column = assert_cast(*col_then).getDataColumn(); - size_t else_index = 0; for (size_t i = 0; i < input_rows_count; ++i) { if (cond_array[i]) result_column->insertFrom(then_nested_column, 0); else - result_column->insertFrom(*col_else, else_is_short ? else_index++ : i); + result_column->insertFrom(*col_else, i); } } else if (else_is_const) { const IColumn & else_nested_column = assert_cast(*col_else).getDataColumn(); - size_t then_index = 0; for (size_t i = 0; i < input_rows_count; ++i) { if (cond_array[i]) - result_column->insertFrom(*col_then, then_is_short ? then_index++ : i); + result_column->insertFrom(*col_then, i); else result_column->insertFrom(else_nested_column, 0); } } else { - size_t then_index = 0, else_index = 0; for (size_t i = 0; i < input_rows_count; ++i) { if (cond_array[i]) - result_column->insertFrom(*col_then, then_is_short ? then_index++ : i); + result_column->insertFrom(*col_then, i); else - result_column->insertFrom(*col_else, else_is_short ? else_index++ : i); + result_column->insertFrom(*col_else, i); } } @@ -948,12 +843,12 @@ private: bool cond_is_const = false; bool cond_is_true = false; bool cond_is_false = false; - if (const auto * const_arg = checkAndGetColumn(*arg_cond.column)) + if (const auto * const_arg = checkAndGetColumn(&*arg_cond.column)) { cond_is_const = true; not_const_condition = const_arg->getDataColumnPtr(); ColumnPtr data_column = const_arg->getDataColumnPtr(); - if (const auto * const_nullable_arg = checkAndGetColumn(*data_column)) + if (const auto * const_nullable_arg = checkAndGetColumn(&*data_column)) { data_column = const_nullable_arg->getNestedColumnPtr(); if (!data_column->empty()) @@ -962,7 +857,7 @@ private: if (!data_column->empty()) { - cond_is_true = !cond_is_null && checkAndGetColumn(*data_column)->getBool(0); + cond_is_true = !cond_is_null && checkAndGetColumn(*data_column).getBool(0); cond_is_false = !cond_is_null && !cond_is_true; } } @@ -975,12 +870,12 @@ private: else if (cond_is_false || cond_is_null) return castColumn(column2, result_type); - if (const auto * nullable = checkAndGetColumn(*not_const_condition)) + if (const auto * nullable = checkAndGetColumn(&*not_const_condition)) { ColumnPtr new_cond_column = nullable->getNestedColumnPtr(); size_t column_size = arg_cond.column->size(); - if (checkAndGetColumn(*new_cond_column)) + if (checkAndGetColumn(&*new_cond_column)) { auto nested_column_copy = new_cond_column->cloneResized(new_cond_column->size()); typeid_cast(nested_column_copy.get())->applyZeroMap(nullable->getNullMapData()); @@ -1027,12 +922,12 @@ private: /// Const(size = 0, Int32(size = 1)) static ColumnPtr recursiveGetNestedColumnWithoutNullable(const ColumnPtr & column) { - if (const auto * nullable = checkAndGetColumn(*column)) + if (const auto * nullable = checkAndGetColumn(&*column)) { /// Nullable cannot contain Nullable return nullable->getNestedColumnPtr(); } - else if (const auto * column_const = checkAndGetColumn(*column)) + else if (const auto * column_const = checkAndGetColumn(&*column)) { /// Save Constant, but remove Nullable return ColumnConst::create(recursiveGetNestedColumnWithoutNullable(column_const->getDataColumnPtr()), column->size()); @@ -1051,8 +946,8 @@ private: const ColumnWithTypeAndName & arg_then = arguments[1]; const ColumnWithTypeAndName & arg_else = arguments[2]; - const auto * then_is_nullable = checkAndGetColumn(*arg_then.column); - const auto * else_is_nullable = checkAndGetColumn(*arg_else.column); + const auto * then_is_nullable = checkAndGetColumn(&*arg_then.column); + const auto * else_is_nullable = checkAndGetColumn(&*arg_else.column); if (!then_is_nullable && !else_is_nullable) return nullptr; @@ -1124,9 +1019,6 @@ private: if (then_is_null && else_is_null) return result_type->createColumnConstWithDefaultValue(input_rows_count); - bool then_is_short = arg_then.column->size() < arg_cond.column->size(); - bool else_is_short = arg_else.column->size() < arg_cond.column->size(); - const ColumnUInt8 * cond_col = typeid_cast(arg_cond.column.get()); const ColumnConst * cond_const_col = checkAndGetColumnConst>(arg_cond.column.get()); @@ -1145,8 +1037,6 @@ private: { arg_else_column = arg_else_column->convertToFullColumnIfConst(); auto result_column = IColumn::mutate(std::move(arg_else_column)); - if (else_is_short) - result_column->expand(cond_col->getData(), true); if (isColumnNullable(*result_column)) { assert_cast(*result_column).applyNullMap(assert_cast(*arg_cond.column)); @@ -1157,6 +1047,11 @@ private: variant_column->applyNullMap(assert_cast(*arg_cond.column).getData()); return result_column; } + else if (auto * dynamic_column = typeid_cast(result_column.get())) + { + dynamic_column->applyNullMap(assert_cast(*arg_cond.column).getData()); + return result_column; + } else return ColumnNullable::create(materializeColumnIfConst(result_column), arg_cond.column); } @@ -1187,8 +1082,6 @@ private: { arg_then_column = arg_then_column->convertToFullColumnIfConst(); auto result_column = IColumn::mutate(std::move(arg_then_column)); - if (then_is_short) - result_column->expand(cond_col->getData(), false); if (isColumnNullable(*result_column)) { @@ -1200,6 +1093,11 @@ private: variant_column->applyNegatedNullMap(assert_cast(*arg_cond.column).getData()); return result_column; } + else if (auto * dynamic_column = typeid_cast(result_column.get())) + { + dynamic_column->applyNegatedNullMap(assert_cast(*arg_cond.column).getData()); + return result_column; + } else { size_t size = input_rows_count; @@ -1278,16 +1176,16 @@ public: /// Get result types by argument types. If the function does not apply to these arguments, throw an exception. DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments[0]->onlyNull()) - return arguments[2]; + if (!arguments[0]->onlyNull()) + { + if (arguments[0]->isNullable()) + return getReturnTypeImpl({ + removeNullable(arguments[0]), arguments[1], arguments[2]}); - if (arguments[0]->isNullable()) - return getReturnTypeImpl({ - removeNullable(arguments[0]), arguments[1], arguments[2]}); - - if (!WhichDataType(arguments[0]).isUInt8()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument (condition) of function if. " - "Must be UInt8.", arguments[0]->getName()); + if (!WhichDataType(arguments[0]).isUInt8()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument (condition) of function if. " + "Must be UInt8.", arguments[0]->getName()); + } if (use_variant_when_no_common_type) return getLeastSupertypeOrVariant(DataTypes{arguments[1], arguments[2]}); diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 5460ee06792..6b2958227bc 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Functions/initialQueryID.cpp b/src/Functions/initialQueryID.cpp index 469f37cf614..9c9390d4e50 100644 --- a/src/Functions/initialQueryID.cpp +++ b/src/Functions/initialQueryID.cpp @@ -19,16 +19,16 @@ public: explicit FunctionInitialQueryID(const String & initial_query_id_) : initial_query_id(initial_query_id_) {} - inline String getName() const override { return name; } + String getName() const override { return name; } - inline size_t getNumberOfArguments() const override { return 0; } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); } - inline bool isDeterministic() const override { return false; } + bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp index 38939556fa5..6b5bb00eacd 100644 --- a/src/Functions/intDiv.cpp +++ b/src/Functions/intDiv.cpp @@ -80,7 +80,7 @@ struct DivideIntegralByConstantImpl private: template - static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) + static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) { if constexpr (op_case == OpCase::Vector) c[i] = Op::template apply(a[i], b[i]); diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp index 96ff6ea80fc..f32eac17127 100644 --- a/src/Functions/intDivOrZero.cpp +++ b/src/Functions/intDivOrZero.cpp @@ -13,7 +13,7 @@ struct DivideIntegralOrZeroImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { if (unlikely(divisionLeadsToFPE(a, b))) return 0; diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp index 6944c4701bc..733f9d55702 100644 --- a/src/Functions/intExp10.cpp +++ b/src/Functions/intExp10.cpp @@ -19,7 +19,7 @@ struct IntExp10Impl using ResultType = UInt64; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType apply([[maybe_unused]] A a) + static ResultType apply([[maybe_unused]] A a) { if constexpr (is_big_int_v || std::is_same_v) throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "IntExp10 is not implemented for big integers"); diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp index 4e5cc60a731..7e016a0dbd2 100644 --- a/src/Functions/intExp2.cpp +++ b/src/Functions/intExp2.cpp @@ -20,7 +20,7 @@ struct IntExp2Impl using ResultType = UInt64; static constexpr bool allow_string_or_fixed_string = false; - static inline ResultType apply([[maybe_unused]] A a) + static ResultType apply([[maybe_unused]] A a) { if constexpr (is_big_int_v) throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "intExp2 not implemented for big integers"); @@ -31,7 +31,7 @@ struct IntExp2Impl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "IntExp2Impl expected an integral type"); diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp index dd5182aeade..ea95a5c2b1c 100644 --- a/src/Functions/isNotNull.cpp +++ b/src/Functions/isNotNull.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -44,9 +45,10 @@ public: { const ColumnWithTypeAndName & elem = arguments[0]; - if (isVariant(elem.type)) + if (isVariant(elem.type) || isDynamic(elem.type)) { - const auto & discriminators = checkAndGetColumn(*elem.column)->getLocalDiscriminators(); + const auto & column_variant = isVariant(elem.type) ? checkAndGetColumn(*elem.column) : checkAndGetColumn(*elem.column).getVariantColumn(); + const auto & discriminators = column_variant.getLocalDiscriminators(); auto res = DataTypeUInt8().createColumn(); auto & data = typeid_cast(*res).getData(); data.resize(discriminators.size()); @@ -57,17 +59,17 @@ public: if (elem.type->isLowCardinalityNullable()) { - const auto * low_cardinality_column = checkAndGetColumn(*elem.column); - const size_t null_index = low_cardinality_column->getDictionary().getNullValueIndex(); + const auto & low_cardinality_column = checkAndGetColumn(*elem.column); + const size_t null_index = low_cardinality_column.getDictionary().getNullValueIndex(); auto res = DataTypeUInt8().createColumn(); auto & data = typeid_cast(*res).getData(); - data.resize(low_cardinality_column->size()); - for (size_t i = 0; i != low_cardinality_column->size(); ++i) - data[i] = (low_cardinality_column->getIndexAt(i) != null_index); + data.resize(low_cardinality_column.size()); + for (size_t i = 0; i != low_cardinality_column.size(); ++i) + data[i] = (low_cardinality_column.getIndexAt(i) != null_index); return res; } - if (const auto * nullable = checkAndGetColumn(*elem.column)) + if (const auto * nullable = checkAndGetColumn(&*elem.column)) { /// Return the negated null map. auto res_column = ColumnUInt8::create(input_rows_count); diff --git a/src/Functions/isNull.cpp b/src/Functions/isNull.cpp index 4bf4e44f866..a98ff2ab8e8 100644 --- a/src/Functions/isNull.cpp +++ b/src/Functions/isNull.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -46,9 +47,10 @@ public: { const ColumnWithTypeAndName & elem = arguments[0]; - if (isVariant(elem.type)) + if (isVariant(elem.type) || isDynamic(elem.type)) { - const auto & discriminators = checkAndGetColumn(*elem.column)->getLocalDiscriminators(); + const auto & column_variant = isVariant(elem.type) ? checkAndGetColumn(*elem.column) : checkAndGetColumn(*elem.column).getVariantColumn(); + const auto & discriminators = column_variant.getLocalDiscriminators(); auto res = DataTypeUInt8().createColumn(); auto & data = typeid_cast(*res).getData(); data.reserve(discriminators.size()); @@ -59,17 +61,17 @@ public: if (elem.type->isLowCardinalityNullable()) { - const auto * low_cardinality_column = checkAndGetColumn(*elem.column); - size_t null_index = low_cardinality_column->getDictionary().getNullValueIndex(); + const auto & low_cardinality_column = checkAndGetColumn(*elem.column); + size_t null_index = low_cardinality_column.getDictionary().getNullValueIndex(); auto res = DataTypeUInt8().createColumn(); auto & data = typeid_cast(*res).getData(); - data.reserve(low_cardinality_column->size()); - for (size_t i = 0; i != low_cardinality_column->size(); ++i) - data.push_back(low_cardinality_column->getIndexAt(i) == null_index); + data.reserve(low_cardinality_column.size()); + for (size_t i = 0; i != low_cardinality_column.size(); ++i) + data.push_back(low_cardinality_column.getIndexAt(i) == null_index); return res; } - if (const auto * nullable = checkAndGetColumn(*elem.column)) + if (const auto * nullable = checkAndGetColumn(&*elem.column)) { /// Merely return the embedded null map. return nullable->getNullMapColumnPtr(); diff --git a/src/Functions/isValidUTF8.cpp b/src/Functions/isValidUTF8.cpp index e7aba672356..d5f5e6a8986 100644 --- a/src/Functions/isValidUTF8.cpp +++ b/src/Functions/isValidUTF8.cpp @@ -65,9 +65,9 @@ SOFTWARE. */ #ifndef __SSE4_1__ - static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); } + static UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); } #else - static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) + static UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { /* * Map high nibble of "First Byte" to legal character length minus 1 diff --git a/src/Functions/jsonMergePatch.cpp b/src/Functions/jsonMergePatch.cpp index 65946721432..a83daacdbf6 100644 --- a/src/Functions/jsonMergePatch.cpp +++ b/src/Functions/jsonMergePatch.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -53,7 +53,7 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} requires at least one argument.", getName()); for (const auto & arg : arguments) if (!isString(arg.type)) diff --git a/src/Functions/jumpConsistentHash.cpp b/src/Functions/jumpConsistentHash.cpp index ffc21eb5cea..fbac5d4fdd5 100644 --- a/src/Functions/jumpConsistentHash.cpp +++ b/src/Functions/jumpConsistentHash.cpp @@ -29,7 +29,7 @@ struct JumpConsistentHashImpl using BucketsType = ResultType; static constexpr auto max_buckets = static_cast(std::numeric_limits::max()); - static inline ResultType apply(UInt64 hash, BucketsType n) + static ResultType apply(UInt64 hash, BucketsType n) { return JumpConsistentHash(hash, n); } diff --git a/src/Functions/keyvaluepair/tests/gtest_extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/tests/gtest_extractKeyValuePairs.cpp index 55a08023cbd..88dc287be16 100644 --- a/src/Functions/keyvaluepair/tests/gtest_extractKeyValuePairs.cpp +++ b/src/Functions/keyvaluepair/tests/gtest_extractKeyValuePairs.cpp @@ -11,7 +11,6 @@ #include #include -#include namespace @@ -41,23 +40,6 @@ std::string PrintMap(const auto & keys, const auto & values) return std::move(buff.str()); } -template -struct Dump -{ - const T & value; - - friend std::ostream & operator<<(std::ostream & ostr, const Dump & d) - { - return dumpValue(ostr, d.value); - } -}; - -template -auto print_with_dump(const T & value) -{ - return Dump{value}; -} - } struct KeyValuePairExtractorTestParam @@ -82,9 +64,7 @@ TEST_P(extractKVPairKeyValuePairExtractorTest, Match) auto values = ColumnString::create(); auto pairs_found = kv_parser->extract(input, keys, values); - ASSERT_EQ(expected.size(), pairs_found) - << "\texpected: " << print_with_dump(expected) << "\n" - << "\tactual : " << print_with_dump(*ToColumnMap(keys, values)); + ASSERT_EQ(expected.size(), pairs_found); size_t i = 0; for (const auto & expected_kv : expected) diff --git a/src/Functions/keyvaluepair/tests/gtest_inline_escaping_key_state_handler.cpp b/src/Functions/keyvaluepair/tests/gtest_inline_escaping_key_state_handler.cpp index afffb9f6108..c8fe5874281 100644 --- a/src/Functions/keyvaluepair/tests/gtest_inline_escaping_key_state_handler.cpp +++ b/src/Functions/keyvaluepair/tests/gtest_inline_escaping_key_state_handler.cpp @@ -84,7 +84,6 @@ TEST(extractKVPairInlineEscapingKeyStateHandler, Read) std::string key_str = "name"; std::string key_with_delimiter_str = key_str + ':'; - std::string key_with_delimiter_and_left_spacing = " " + key_with_delimiter_str; std::string key_with_delimiter_and_random_characters_str = key_str + ':' + "a$a\\:''\""; // no delimiter, should discard diff --git a/src/Functions/keyvaluepair/tests/gtest_no_escaping_key_state_handler.cpp b/src/Functions/keyvaluepair/tests/gtest_no_escaping_key_state_handler.cpp index c849fa56d9f..c4a3feed63e 100644 --- a/src/Functions/keyvaluepair/tests/gtest_no_escaping_key_state_handler.cpp +++ b/src/Functions/keyvaluepair/tests/gtest_no_escaping_key_state_handler.cpp @@ -87,7 +87,6 @@ TEST(extractKVPairNoEscapingKeyStateHandler, Read) std::string key_str = "name"; std::string key_with_delimiter_str = key_str + ':'; - std::string key_with_delimiter_and_left_spacing = " " + key_with_delimiter_str; std::string key_with_delimiter_and_random_characters_str = key_str + ':' + "a$a\\:''\""; // no delimiter, should discard @@ -101,4 +100,3 @@ TEST(extractKVPairNoEscapingKeyStateHandler, Read) test_read(handler, "", "", 0u, State::END); } - diff --git a/src/Functions/kostikConsistentHash.cpp b/src/Functions/kostikConsistentHash.cpp index 47a9a928976..42004ed40d9 100644 --- a/src/Functions/kostikConsistentHash.cpp +++ b/src/Functions/kostikConsistentHash.cpp @@ -17,7 +17,7 @@ struct KostikConsistentHashImpl using BucketsType = ResultType; static constexpr auto max_buckets = 32768; - static inline ResultType apply(UInt64 hash, BucketsType n) + static ResultType apply(UInt64 hash, BucketsType n) { return ConsistentHashing(hash, n); } diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index f5680d4d468..babb8378d80 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -15,7 +15,7 @@ struct LeastBaseImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { /** gcc 4.9.2 successfully vectorizes a loop from this function. */ return static_cast(a) < static_cast(b) ? static_cast(a) : static_cast(b); @@ -24,7 +24,7 @@ struct LeastBaseImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) { @@ -46,7 +46,7 @@ struct LeastSpecialImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { static_assert(std::is_same_v, "ResultType != Result"); return accurate::lessOp(a, b) ? static_cast(a) : static_cast(b); diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 8794283a856..3d8b8617472 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -679,7 +679,7 @@ public: Columns converted_arguments = convertMandatoryArguments(arguments, mandatory_argument_names); - auto res_column = ColumnDateTime64::create(input_rows_count, static_cast(precision)); + auto res_column = ColumnDateTime64::create(input_rows_count, precision); auto & result_data = res_column->getData(); const auto & yyyymmddhhmmss_data = typeid_cast(*converted_arguments[0]).getData(); diff --git a/src/Functions/mathConstants.cpp b/src/Functions/mathConstants.cpp index c7eb37289ac..2b199a30616 100644 --- a/src/Functions/mathConstants.cpp +++ b/src/Functions/mathConstants.cpp @@ -1,6 +1,9 @@ #include #include +#include + + namespace DB { @@ -19,7 +22,7 @@ namespace struct EImpl { static constexpr char name[] = "e"; - static constexpr double value = 2.7182818284590452353602874713526624977572470; + static constexpr double value = std::numbers::e; }; using FunctionE = FunctionMathConstFloat64; @@ -28,7 +31,7 @@ namespace struct PiImpl { static constexpr char name[] = "pi"; - static constexpr double value = 3.1415926535897932384626433832795028841971693; + static constexpr double value = std::numbers::pi; }; using FunctionPi = FunctionMathConstFloat64; diff --git a/src/Functions/minSampleSize.cpp b/src/Functions/minSampleSize.cpp index a5826ef5c0e..f37b030c85a 100644 --- a/src/Functions/minSampleSize.cpp +++ b/src/Functions/minSampleSize.cpp @@ -102,14 +102,14 @@ struct ContinuousImpl auto baseline_argument = arguments[0]; baseline_argument.column = baseline_argument.column->convertToFullColumnIfConst(); auto baseline_column_untyped = castColumnAccurate(baseline_argument, float_64_type); - const auto * baseline_column = checkAndGetColumn>(*baseline_column_untyped); - const auto & baseline_column_data = baseline_column->getData(); + const auto & baseline_column = checkAndGetColumn>(*baseline_column_untyped); + const auto & baseline_column_data = baseline_column.getData(); auto sigma_argument = arguments[1]; sigma_argument.column = sigma_argument.column->convertToFullColumnIfConst(); auto sigma_column_untyped = castColumnAccurate(sigma_argument, float_64_type); - const auto * sigma_column = checkAndGetColumn>(*sigma_column_untyped); - const auto & sigma_column_data = sigma_column->getData(); + const auto & sigma_column = checkAndGetColumn>(*sigma_column_untyped); + const auto & sigma_column_data = sigma_column.getData(); const IColumn & col_mde = *arguments[2].column; const IColumn & col_power = *arguments[3].column; diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp index 04877a42b18..f3b9b8a7bcb 100644 --- a/src/Functions/minus.cpp +++ b/src/Functions/minus.cpp @@ -13,7 +13,7 @@ struct MinusImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { if constexpr (is_big_int_v || is_big_int_v) { @@ -28,7 +28,7 @@ struct MinusImpl /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { return common::subOverflow(static_cast(a), b, c); } @@ -36,7 +36,7 @@ struct MinusImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateSub(left, right) : b.CreateFSub(left, right); } diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index cbc2ec2cd0a..ebc1c4f5275 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -105,7 +105,7 @@ struct ModuloByConstantImpl private: template - static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) + static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) { if constexpr (op_case == OpCase::Vector) c[i] = Op::template apply(a[i], b[i]); diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp index 3551ae74c5f..cd7873b3b9e 100644 --- a/src/Functions/moduloOrZero.cpp +++ b/src/Functions/moduloOrZero.cpp @@ -15,7 +15,7 @@ struct ModuloOrZeroImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { if constexpr (std::is_floating_point_v) { diff --git a/src/Functions/mortonDecode.cpp b/src/Functions/mortonDecode.cpp index f65f38fb097..2b7b7b4f2e7 100644 --- a/src/Functions/mortonDecode.cpp +++ b/src/Functions/mortonDecode.cpp @@ -1,10 +1,11 @@ -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -15,13 +16,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; - extern const int ARGUMENT_OUT_OF_BOUND; -} - // NOLINTBEGIN(bugprone-switch-missing-default-case) #define EXTRACT_VECTOR(INDEX) \ @@ -186,7 +180,7 @@ constexpr auto MortonND_5D_Dec = mortonnd::MortonNDLutDecoder<5, 12, 8>(); constexpr auto MortonND_6D_Dec = mortonnd::MortonNDLutDecoder<6, 10, 8>(); constexpr auto MortonND_7D_Dec = mortonnd::MortonNDLutDecoder<7, 9, 8>(); constexpr auto MortonND_8D_Dec = mortonnd::MortonNDLutDecoder<8, 8, 8>(); -class FunctionMortonDecode : public IFunction +class FunctionMortonDecode : public FunctionSpaceFillingCurveDecode<8, 1, 8> { public: static constexpr auto name = "mortonDecode"; @@ -200,68 +194,6 @@ public: return name; } - size_t getNumberOfArguments() const override - { - return 2; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - UInt64 tuple_size = 0; - const auto * col_const = typeid_cast(arguments[0].column.get()); - if (!col_const) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column type {} of function {}, should be a constant (UInt or Tuple)", - arguments[0].type->getName(), getName()); - if (!WhichDataType(arguments[1].type).isNativeUInt()) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column type {} of function {}, should be a native UInt", - arguments[1].type->getName(), getName()); - const auto * mask = typeid_cast(col_const->getDataColumnPtr().get()); - if (mask) - { - tuple_size = mask->tupleSize(); - } - else if (WhichDataType(arguments[0].type).isNativeUInt()) - { - tuple_size = col_const->getUInt(0); - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column type {} of function {}, should be UInt or Tuple", - arguments[0].type->getName(), getName()); - if (tuple_size > 8 || tuple_size < 1) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "Illegal first argument for function {}, should be a number in range 1-8 or a Tuple of such size", - getName()); - if (mask) - { - const auto * type_tuple = typeid_cast(arguments[0].type.get()); - for (size_t i = 0; i < tuple_size; i++) - { - if (!WhichDataType(type_tuple->getElement(i)).isNativeUInt()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument in tuple for function {}, should be a native UInt", - type_tuple->getElement(i)->getName(), getName()); - auto ratio = mask->getColumn(i).getUInt(0); - if (ratio > 8 || ratio < 1) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "Illegal argument {} in tuple for function {}, should be a number in range 1-8", - ratio, getName()); - } - } - DataTypes types(tuple_size); - for (size_t i = 0; i < tuple_size; i++) - { - types[i] = std::make_shared(); - } - return std::make_shared(types); - } - static UInt64 shrink(UInt64 ratio, UInt64 value) { switch (ratio) // NOLINT(bugprone-switch-missing-default-case) diff --git a/src/Functions/mortonEncode.cpp b/src/Functions/mortonEncode.cpp index 3b95c114b14..0c19c7c3134 100644 --- a/src/Functions/mortonEncode.cpp +++ b/src/Functions/mortonEncode.cpp @@ -1,10 +1,9 @@ #include #include -#include -#include #include #include #include +#include #include #include @@ -19,7 +18,6 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } #define EXTRACT_VECTOR(INDEX) \ @@ -144,7 +142,7 @@ constexpr auto MortonND_5D_Enc = mortonnd::MortonNDLutEncoder<5, 12, 8>(); constexpr auto MortonND_6D_Enc = mortonnd::MortonNDLutEncoder<6, 10, 8>(); constexpr auto MortonND_7D_Enc = mortonnd::MortonNDLutEncoder<7, 9, 8>(); constexpr auto MortonND_8D_Enc = mortonnd::MortonNDLutEncoder<8, 8, 8>(); -class FunctionMortonEncode : public IFunction +class FunctionMortonEncode : public FunctionSpaceFillingCurveEncode { public: static constexpr auto name = "mortonEncode"; @@ -158,56 +156,6 @@ public: return name; } - bool isVariadic() const override - { - return true; - } - - size_t getNumberOfArguments() const override - { - return 0; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - bool useDefaultImplementationForConstants() const override { return true; } - - DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override - { - size_t vectorStartIndex = 0; - if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, - "At least one UInt argument is required for function {}", - getName()); - if (WhichDataType(arguments[0]).isTuple()) - { - vectorStartIndex = 1; - const auto * type_tuple = typeid_cast(arguments[0].get()); - auto tuple_size = type_tuple->getElements().size(); - if (tuple_size != (arguments.size() - 1)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "Illegal argument {} for function {}, tuple size should be equal to number of UInt arguments", - arguments[0]->getName(), getName()); - for (size_t i = 0; i < tuple_size; i++) - { - if (!WhichDataType(type_tuple->getElement(i)).isNativeUInt()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument in tuple for function {}, should be a native UInt", - type_tuple->getElement(i)->getName(), getName()); - } - } - - for (size_t i = vectorStartIndex; i < arguments.size(); i++) - { - const auto & arg = arguments[i]; - if (!WhichDataType(arg).isNativeUInt()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument of function {}, should be a native UInt", - arg->getName(), getName()); - } - return std::make_shared(); - } - static UInt64 expand(UInt64 ratio, UInt64 value) { switch (ratio) // NOLINT(bugprone-switch-missing-default-case) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 49c45d0c0be..d3bf5618f66 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -148,11 +148,6 @@ public: bool condition_always_true = false; bool condition_is_nullable = false; bool source_is_constant = false; - - bool condition_is_short = false; - bool source_is_short = false; - size_t condition_index = 0; - size_t source_index = 0; }; ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -198,7 +193,7 @@ public: if (cond_col->onlyNull()) continue; - if (const auto * column_const = checkAndGetColumn(*cond_col)) + if (const auto * column_const = checkAndGetColumn(&*cond_col)) { Field value = column_const->getField(); @@ -214,12 +209,9 @@ public: instruction.condition = cond_col; instruction.condition_is_nullable = instruction.condition->isNullable(); } - - instruction.condition_is_short = cond_col->size() < arguments[0].column->size(); } const ColumnWithTypeAndName & source_col = arguments[source_idx]; - instruction.source_is_short = source_col.column->size() < arguments[0].column->size(); if (source_col.type->equals(*return_type)) { instruction.source = source_col.column; @@ -250,19 +242,8 @@ public: return ColumnConst::create(std::move(res), instruction.source->size()); } - bool contains_short = false; - for (const auto & instruction : instructions) - { - if (instruction.condition_is_short || instruction.source_is_short) - { - contains_short = true; - break; - } - } - const WhichDataType which(removeNullable(result_type)); - bool execute_multiif_columnar = allow_execute_multiif_columnar && !contains_short - && instructions.size() <= std::numeric_limits::max() + bool execute_multiif_columnar = allow_execute_multiif_columnar && instructions.size() <= std::numeric_limits::max() && (which.isInt() || which.isUInt() || which.isFloat() || which.isDecimal() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isEnum() || which.isIPv4() || which.isIPv6()); @@ -339,25 +320,23 @@ private: { bool insert = false; - size_t condition_index = instruction.condition_is_short ? instruction.condition_index++ : i; if (instruction.condition_always_true) insert = true; else if (!instruction.condition_is_nullable) - insert = assert_cast(*instruction.condition).getData()[condition_index]; + insert = assert_cast(*instruction.condition).getData()[i]; else { const ColumnNullable & condition_nullable = assert_cast(*instruction.condition); const ColumnUInt8 & condition_nested = assert_cast(condition_nullable.getNestedColumn()); const NullMap & condition_null_map = condition_nullable.getNullMapData(); - insert = !condition_null_map[condition_index] && condition_nested.getData()[condition_index]; + insert = !condition_null_map[i] && condition_nested.getData()[i]; } if (insert) { - size_t source_index = instruction.source_is_short ? instruction.source_index++ : i; if (!instruction.source_is_constant) - res->insertFrom(*instruction.source, source_index); + res->insertFrom(*instruction.source, i); else res->insertFrom(assert_cast(*instruction.source).getDataColumn(), 0); diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp index 4dc8cd10f31..67b6fff6b58 100644 --- a/src/Functions/multiply.cpp +++ b/src/Functions/multiply.cpp @@ -14,7 +14,7 @@ struct MultiplyImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { if constexpr (is_big_int_v || is_big_int_v) { @@ -29,7 +29,7 @@ struct MultiplyImpl /// Apply operation and check overflow. It's used for Decimal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { if constexpr (std::is_same_v || std::is_same_v) { @@ -43,7 +43,7 @@ struct MultiplyImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateMul(left, right) : b.CreateFMul(left, right); } diff --git a/src/Functions/multiplyDecimal.cpp b/src/Functions/multiplyDecimal.cpp index ed6487c6683..7e30a893d72 100644 --- a/src/Functions/multiplyDecimal.cpp +++ b/src/Functions/multiplyDecimal.cpp @@ -17,7 +17,7 @@ struct MultiplyDecimalsImpl static constexpr auto name = "multiplyDecimal"; template - static inline Decimal256 + static Decimal256 execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) { if (a.value == 0 || b.value == 0) diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp index bd47780dea8..2c9b461274d 100644 --- a/src/Functions/negate.cpp +++ b/src/Functions/negate.cpp @@ -11,7 +11,7 @@ struct NegateImpl using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfNegate::Type>; static constexpr const bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { return -static_cast(a); } @@ -19,7 +19,7 @@ struct NegateImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { return arg->getType()->isIntegerTy() ? b.CreateNeg(arg) : b.CreateFNeg(arg); } diff --git a/src/Functions/neighbor.cpp b/src/Functions/neighbor.cpp index abe6d39422d..62f129109f9 100644 --- a/src/Functions/neighbor.cpp +++ b/src/Functions/neighbor.cpp @@ -36,11 +36,11 @@ public: static FunctionPtr create(ContextPtr context) { - if (!context->getSettingsRef().allow_deprecated_functions) + if (!context->getSettingsRef().allow_deprecated_error_prone_window_functions) throw Exception( ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated since its usage is error-prone (see docs)." - "Please use proper window function or set `allow_deprecated_functions` setting to enable it", + "Please use proper window function or set `allow_deprecated_error_prone_window_functions` setting to enable it", name); return std::make_shared(); diff --git a/src/Functions/nested.cpp b/src/Functions/nested.cpp index 679bb4f73d8..bdaf57d65c9 100644 --- a/src/Functions/nested.cpp +++ b/src/Functions/nested.cpp @@ -18,9 +18,10 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_ARRAYS_DONT_MATCH; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } namespace @@ -64,19 +65,19 @@ public: { size_t arguments_size = arguments.size(); if (arguments_size < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} doesn't match: passed {}, should be at least 2", getName(), arguments_size); Names nested_names = extractNestedNames(arguments[0].column); if (nested_names.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument for function {} must be constant column with array of strings", getName()); if (nested_names.size() != arguments_size - 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Size of nested names array for function {} does not match arrays arguments size. Actual {}. Expected {}", getName(), nested_names.size(), diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index 0f1e8a04236..d6f8474c984 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -18,7 +18,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int CANNOT_CLOCK_GETTIME; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } namespace @@ -128,7 +128,7 @@ public: if (arguments.size() > 2) { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0, or 1, or 2", getName()); + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Arguments size of function {} should be 0, or 1, or 2", getName()); } if (!arguments.empty()) { diff --git a/src/Functions/nowInBlock.cpp b/src/Functions/nowInBlock.cpp index 0d5f9c45780..74f420986c8 100644 --- a/src/Functions/nowInBlock.cpp +++ b/src/Functions/nowInBlock.cpp @@ -12,7 +12,7 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -63,7 +63,7 @@ public: { if (arguments.size() > 1) { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0 or 1", getName()); + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Arguments size of function {} should be 0 or 1", getName()); } if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type)) { diff --git a/src/Functions/padString.cpp b/src/Functions/padString.cpp index b26a4ec3d6a..8670c837e21 100644 --- a/src/Functions/padString.cpp +++ b/src/Functions/padString.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -84,8 +85,7 @@ namespace if (offset == pad_string.length()) break; offset += UTF8::seqLength(pad_string[offset]); - if (offset > pad_string.length()) - offset = pad_string.length(); + offset = std::min(offset, pad_string.length()); } } @@ -211,19 +211,18 @@ namespace pad_string = column_pad_const->getValue(); } - PaddingChars padding_chars{pad_string}; auto col_res = ColumnString::create(); StringSink res_sink{*col_res, input_rows_count}; if (const ColumnString * col = checkAndGetColumn(column_string.get())) - executeForSource(StringSource{*col}, column_length, padding_chars, res_sink); + executeForSource(StringSource{*col}, column_length, pad_string, res_sink); else if (const ColumnFixedString * col_fixed = checkAndGetColumn(column_string.get())) - executeForSource(FixedStringSource{*col_fixed}, column_length, padding_chars, res_sink); + executeForSource(FixedStringSource{*col_fixed}, column_length, pad_string, res_sink); else if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) - executeForSource(ConstSource{*col_const}, column_length, padding_chars, res_sink); + executeForSource(ConstSource{*col_const}, column_length, pad_string, res_sink); else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst(column_string.get())) - executeForSource(ConstSource{*col_const_fixed}, column_length, padding_chars, res_sink); + executeForSource(ConstSource{*col_const_fixed}, column_length, pad_string, res_sink); else throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -236,23 +235,40 @@ namespace private: template - void executeForSource( - SourceStrings && strings, - const ColumnPtr & column_length, - const PaddingChars & padding_chars, - StringSink & res_sink) const + void executeForSource(SourceStrings && strings, const ColumnPtr & column_length, const String & pad_string, StringSink & res_sink) const { - if (const auto * col_const = checkAndGetColumn(column_length.get())) - executeForSourceAndLength(std::forward(strings), ConstSource{*col_const}, padding_chars, res_sink); + const auto & chars = strings.getElements(); + bool all_ascii = isAllASCII(reinterpret_cast(pad_string.data()), pad_string.size()) + && isAllASCII(chars.data(), chars.size()); + bool is_actually_utf8 = is_utf8 && !all_ascii; + + if (!is_actually_utf8) + { + PaddingChars padding_chars{pad_string}; + if (const auto * col_const = checkAndGetColumn(column_length.get())) + executeForSourceAndLength( + std::forward(strings), ConstSource{*col_const}, padding_chars, res_sink); + else + executeForSourceAndLength( + std::forward(strings), GenericValueSource{*column_length}, padding_chars, res_sink); + } else - executeForSourceAndLength(std::forward(strings), GenericValueSource{*column_length}, padding_chars, res_sink); + { + PaddingChars padding_chars{pad_string}; + if (const auto * col_const = checkAndGetColumn(column_length.get())) + executeForSourceAndLength( + std::forward(strings), ConstSource{*col_const}, padding_chars, res_sink); + else + executeForSourceAndLength( + std::forward(strings), GenericValueSource{*column_length}, padding_chars, res_sink); + } } - template + template void executeForSourceAndLength( SourceStrings && strings, SourceLengths && lengths, - const PaddingChars & padding_chars, + const PaddingChars & padding_chars, StringSink & res_sink) const { bool is_const_new_length = lengths.isConst(); @@ -264,7 +280,7 @@ namespace for (; !res_sink.isEnd(); res_sink.next(), strings.next(), lengths.next()) { auto str = strings.getWhole(); - ssize_t current_length = getLengthOfSlice(str); + ssize_t current_length = getLengthOfSlice(str); if (!res_sink.rowNum() || !is_const_new_length) { @@ -294,7 +310,7 @@ namespace } else if (new_length < current_length) { - str = removeSuffixFromSlice(str, current_length - new_length); + str = removeSuffixFromSlice(str, current_length - new_length); writeSlice(str, res_sink); } else if (new_length > current_length) diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index d9613120c94..11e210d2cc2 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -37,13 +37,13 @@ namespace { using Pos = const char *; - enum class ParseSyntax + enum class ParseSyntax : uint8_t { MySQL, Joda }; - enum class ErrorHandling + enum class ErrorHandling : uint8_t { Exception, Zero, @@ -713,7 +713,7 @@ namespace class Instruction { private: - enum class NeedCheckSpace + enum class NeedCheckSpace : uint8_t { Yes, No diff --git a/src/Functions/parseReadableSize.cpp b/src/Functions/parseReadableSize.cpp new file mode 100644 index 00000000000..f5c2c53439b --- /dev/null +++ b/src/Functions/parseReadableSize.cpp @@ -0,0 +1,325 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int CANNOT_PARSE_NUMBER; + extern const int CANNOT_PARSE_TEXT; + extern const int ILLEGAL_COLUMN; + extern const int UNEXPECTED_DATA_AFTER_PARSED_VALUE; +} + +enum class ErrorHandling : uint8_t +{ + Exception, + Zero, + Null +}; + +using ScaleFactors = std::unordered_map; + +/** parseReadableSize* - Returns the number of bytes corresponding to a given readable binary or decimal size. + * Examples: + * - `parseReadableSize('123 MiB')` + * - `parseReadableSize('123 MB')` + * Meant to be the inverse of `formatReadable*Size` with the following exceptions: + * - Number of bytes is returned as an unsigned integer amount instead of a float. Decimal points are rounded up to the nearest integer. + * - Negative numbers are not allowed as negative sizes don't make sense. + * Flavours: + * - parseReadableSize + * - parseReadableSizeOrNull + * - parseReadableSizeOrZero + */ +template +class FunctionParseReadable : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } + + String getName() const override { return name; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args + { + {"readable_size", static_cast(&isString), nullptr, "String"}, + }; + validateFunctionArgumentTypes(*this, arguments, args); + DataTypePtr return_type = std::make_shared(); + if constexpr (error_handling == ErrorHandling::Null) + return std::make_shared(return_type); + else + return return_type; + } + + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_str = checkAndGetColumn(arguments[0].column.get()); + if (!col_str) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first ('str') argument of function {}. Must be string.", + arguments[0].column->getName(), + getName() + ); + } + + auto col_res = ColumnUInt64::create(input_rows_count); + + ColumnUInt8::MutablePtr col_null_map; + if constexpr (error_handling == ErrorHandling::Null) + col_null_map = ColumnUInt8::create(input_rows_count, 0); + + auto & res_data = col_res->getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + std::string_view value = col_str->getDataAt(i).toView(); + try + { + UInt64 num_bytes = parseReadableFormat(value); + res_data[i] = num_bytes; + } + catch (const Exception &) + { + if constexpr (error_handling == ErrorHandling::Exception) + { + throw; + } + else + { + res_data[i] = 0; + if constexpr (error_handling == ErrorHandling::Null) + col_null_map->getData()[i] = 1; + } + } + } + if constexpr (error_handling == ErrorHandling::Null) + return ColumnNullable::create(std::move(col_res), std::move(col_null_map)); + else + return col_res; + } + +private: + + UInt64 parseReadableFormat(const std::string_view & value) const + { + static const ScaleFactors scale_factors = + { + {"b", 1ull}, + // ISO/IEC 80000-13 binary units + {"kib", 1024ull}, + {"mib", 1024ull * 1024ull}, + {"gib", 1024ull * 1024ull * 1024ull}, + {"tib", 1024ull * 1024ull * 1024ull * 1024ull}, + {"pib", 1024ull * 1024ull * 1024ull * 1024ull * 1024ull}, + {"eib", 1024ull * 1024ull * 1024ull * 1024ull * 1024ull * 1024ull}, + // Decimal units + {"kb", 1000ull}, + {"mb", 1000ull * 1000ull}, + {"gb", 1000ull * 1000ull * 1000ull}, + {"tb", 1000ull * 1000ull * 1000ull * 1000ull}, + {"pb", 1000ull * 1000ull * 1000ull * 1000ull * 1000ull}, + {"eb", 1000ull * 1000ull * 1000ull * 1000ull * 1000ull * 1000ull}, + }; + ReadBufferFromString buf(value); + + // tryReadFloatText does seem to not raise any error when there is leading whitespace so we check it explicitly + skipWhitespaceIfAny(buf); + if (buf.getPosition() > 0) + { + throw Exception( + ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, + "Invalid expression for function {} - Leading whitespace is not allowed (\"{}\")", + getName(), + value + ); + } + + Float64 base = 0; + if (!tryReadFloatTextPrecise(base, buf)) // If we use the default (fast) tryReadFloatText this returns True on garbage input so we use the Precise version + { + throw Exception( + ErrorCodes::CANNOT_PARSE_NUMBER, + "Invalid expression for function {} - Unable to parse readable size numeric component (\"{}\")", + getName(), + value + ); + } + else if (std::isnan(base) || !std::isfinite(base)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid expression for function {} - Invalid numeric component: {}", + getName(), + base + ); + } + else if (base < 0) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid expression for function {} - Negative sizes are not allowed ({})", + getName(), + base + ); + } + + skipWhitespaceIfAny(buf); + + String unit; + readStringUntilWhitespace(unit, buf); + boost::algorithm::to_lower(unit); + auto iter = scale_factors.find(unit); + if (iter == scale_factors.end()) + { + throw Exception( + ErrorCodes::CANNOT_PARSE_TEXT, + "Invalid expression for function {} - Unknown readable size unit (\"{}\")", + getName(), + unit + ); + } + else if (!buf.eof()) + { + throw Exception( + ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE, + "Invalid expression for function {} - Found trailing characters after readable size string (\"{}\")", + getName(), + value + ); + } + + Float64 num_bytes_with_decimals = base * iter->second; + if (num_bytes_with_decimals > std::numeric_limits::max()) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid expression for function {} - Result is too big for output type (\"{}\")", + getName(), + num_bytes_with_decimals + ); + } + // As the input might be an arbitrary decimal number we might end up with a non-integer amount of bytes when parsing binary (eg MiB) units. + // This doesn't make sense so we round up to indicate the byte size that can fit the passed size. + return static_cast(std::ceil(num_bytes_with_decimals)); + } +}; + +struct NameParseReadableSize +{ + static constexpr auto name = "parseReadableSize"; +}; + +struct NameParseReadableSizeOrNull +{ + static constexpr auto name = "parseReadableSizeOrNull"; +}; + +struct NameParseReadableSizeOrZero +{ + static constexpr auto name = "parseReadableSizeOrZero"; +}; + +using FunctionParseReadableSize = FunctionParseReadable; +using FunctionParseReadableSizeOrNull = FunctionParseReadable; +using FunctionParseReadableSizeOrZero = FunctionParseReadable; + +FunctionDocumentation parseReadableSize_documentation { + .description = "Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it throws an exception.", + .syntax = "parseReadableSize(x)", + .arguments = {{"x", "Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md))"}}, + .returned_value = "Number of bytes, rounded up to the nearest integer ([UInt64](../../sql-reference/data-types/int-uint.md))", + .examples = { + { + "basic", + "SELECT arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB']) AS readable_sizes, parseReadableSize(readable_sizes) AS sizes;", + R"( +┌─readable_sizes─┬───sizes─┐ +│ 1 B │ 1 │ +│ 1 KiB │ 1024 │ +│ 3 MB │ 3000000 │ +│ 5.314 KiB │ 5442 │ +└────────────────┴─────────┘)" + }, + }, + .categories = {"OtherFunctions"}, +}; + +FunctionDocumentation parseReadableSizeOrNull_documentation { + .description = "Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `NULL`", + .syntax = "parseReadableSizeOrNull(x)", + .arguments = {{"x", "Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md))"}}, + .returned_value = "Number of bytes, rounded up to the nearest integer, or NULL if unable to parse the input (Nullable([UInt64](../../sql-reference/data-types/int-uint.md)))", + .examples = { + { + "basic", + "SELECT arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes, parseReadableSizeOrNull(readable_sizes) AS sizes;", + R"( +┌─readable_sizes─┬───sizes─┐ +│ 1 B │ 1 │ +│ 1 KiB │ 1024 │ +│ 3 MB │ 3000000 │ +│ 5.314 KiB │ 5442 │ +│ invalid │ ᴺᵁᴸᴸ │ +└────────────────┴─────────┘)" + }, + }, + .categories = {"OtherFunctions"}, +}; + +FunctionDocumentation parseReadableSizeOrZero_documentation { + .description = "Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`", + .syntax = "parseReadableSizeOrZero(x)", + .arguments = {{"x", "Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md))"}}, + .returned_value = "Number of bytes, rounded up to the nearest integer, or 0 if unable to parse the input ([UInt64](../../sql-reference/data-types/int-uint.md))", + .examples = { + { + "basic", + "SELECT arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes, parseReadableSizeOrZero(readable_sizes) AS sizes;", + R"( +┌─readable_sizes─┬───sizes─┐ +│ 1 B │ 1 │ +│ 1 KiB │ 1024 │ +│ 3 MB │ 3000000 │ +│ 5.314 KiB │ 5442 │ +│ invalid │ 0 │ +└────────────────┴─────────┘)", + }, + }, + .categories = {"OtherFunctions"}, +}; + +REGISTER_FUNCTION(ParseReadableSize) +{ + factory.registerFunction(parseReadableSize_documentation); + factory.registerFunction(parseReadableSizeOrNull_documentation); + factory.registerFunction(parseReadableSizeOrZero_documentation); +} +} diff --git a/src/Functions/parseTimeDelta.cpp b/src/Functions/parseTimeDelta.cpp index 7743a0cb664..44eeb1a289f 100644 --- a/src/Functions/parseTimeDelta.cpp +++ b/src/Functions/parseTimeDelta.cpp @@ -11,7 +11,8 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; } @@ -117,14 +118,14 @@ namespace { if (arguments.empty()) throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()); if (arguments.size() > 1) throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()); diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp index cd9cf6cec5c..ffb0fe2ade7 100644 --- a/src/Functions/plus.cpp +++ b/src/Functions/plus.cpp @@ -14,7 +14,7 @@ struct PlusImpl static const constexpr bool is_commutative = true; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { /// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1). if constexpr (is_big_int_v || is_big_int_v) @@ -30,7 +30,7 @@ struct PlusImpl /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { return common::addOverflow(static_cast(a), b, c); } @@ -38,7 +38,7 @@ struct PlusImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateAdd(left, right) : b.CreateFAdd(left, right); } diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index 0e4467a8210..6b413829bd1 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -7,15 +7,12 @@ #include #include -#include -#include #include #include #include #include #include #include -#include #include #include #include @@ -37,7 +34,7 @@ namespace DB { namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int BAD_ARGUMENTS; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; @@ -87,7 +84,7 @@ public: { if (arguments.size() < 2) { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least 2 arguments", getName()); + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} requires at least 2 arguments", getName()); } /** We allow function invocation in one of the following forms: diff --git a/src/Functions/polygonArea.cpp b/src/Functions/polygonArea.cpp index 1c4ef9f79a3..facb37619ff 100644 --- a/src/Functions/polygonArea.cpp +++ b/src/Functions/polygonArea.cpp @@ -3,16 +3,9 @@ #include #include -#include -#include - -#include #include #include -#include -#include -#include #include #include diff --git a/src/Functions/polygonConvexHull.cpp b/src/Functions/polygonConvexHull.cpp index 921c0700ca7..f19589bcc07 100644 --- a/src/Functions/polygonConvexHull.cpp +++ b/src/Functions/polygonConvexHull.cpp @@ -3,16 +3,8 @@ #include #include -#include -#include - -#include #include -#include -#include -#include -#include #include #include diff --git a/src/Functions/polygonPerimeter.cpp b/src/Functions/polygonPerimeter.cpp index 85645118f84..0a255cec34b 100644 --- a/src/Functions/polygonPerimeter.cpp +++ b/src/Functions/polygonPerimeter.cpp @@ -3,15 +3,9 @@ #include #include -#include -#include - -#include #include #include -#include -#include #include #include diff --git a/src/Functions/polygonsDistance.cpp b/src/Functions/polygonsDistance.cpp index d6c7d799b5e..fdf1ef34ea6 100644 --- a/src/Functions/polygonsDistance.cpp +++ b/src/Functions/polygonsDistance.cpp @@ -3,21 +3,16 @@ #include #include -#include #include -#include #include #include #include #include -#include -#include -#include #include -#include + namespace DB { diff --git a/src/Functions/polygonsIntersection.cpp b/src/Functions/polygonsIntersection.cpp index 5777f438a19..77484e7e63c 100644 --- a/src/Functions/polygonsIntersection.cpp +++ b/src/Functions/polygonsIntersection.cpp @@ -3,20 +3,14 @@ #include #include -#include #include -#include #include #include -#include -#include -#include #include -#include -#include + namespace DB { diff --git a/src/Functions/polygonsSymDifference.cpp b/src/Functions/polygonsSymDifference.cpp index 785a8f76ba6..194b7f2cfd7 100644 --- a/src/Functions/polygonsSymDifference.cpp +++ b/src/Functions/polygonsSymDifference.cpp @@ -3,19 +3,13 @@ #include #include -#include #include -#include #include #include -#include -#include -#include #include -#include namespace DB { diff --git a/src/Functions/polygonsUnion.cpp b/src/Functions/polygonsUnion.cpp index a31d223ea8c..37d865af50a 100644 --- a/src/Functions/polygonsUnion.cpp +++ b/src/Functions/polygonsUnion.cpp @@ -7,7 +7,6 @@ #include #include -#include #include diff --git a/src/Functions/polygonsWithin.cpp b/src/Functions/polygonsWithin.cpp index bf4db1cf9f8..35a9e17cdfd 100644 --- a/src/Functions/polygonsWithin.cpp +++ b/src/Functions/polygonsWithin.cpp @@ -3,21 +3,15 @@ #include #include -#include #include -#include #include #include #include #include -#include -#include -#include #include -#include namespace DB { diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp index 107302069b4..8004e3731b5 100644 --- a/src/Functions/punycode.cpp +++ b/src/Functions/punycode.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes /// Implementation of /// - punycodeEncode(), punycodeDecode() and tryPunycodeDecode(), see https://en.wikipedia.org/wiki/Punycode -enum class ErrorHandling +enum class ErrorHandling : uint8_t { Throw, /// Throw exception Empty /// Return empty string diff --git a/src/Functions/queryID.cpp b/src/Functions/queryID.cpp index 704206e1de5..5d0ac719797 100644 --- a/src/Functions/queryID.cpp +++ b/src/Functions/queryID.cpp @@ -19,16 +19,16 @@ public: explicit FunctionQueryID(const String & query_id_) : query_id(query_id_) {} - inline String getName() const override { return name; } + String getName() const override { return name; } - inline size_t getNumberOfArguments() const override { return 0; } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); } - inline bool isDeterministic() const override { return false; } + bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index 8dff297bcb1..ddc847b1ca5 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -51,14 +51,14 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { - const auto * column_string = checkAndGetColumn(arguments[0].column.get()); + const auto & column_string = checkAndGetColumn(*arguments[0].column); Serializer serializer; Geometry geometry; for (size_t i = 0; i < input_rows_count; ++i) { - const auto & str = column_string->getDataAt(i).toString(); + const auto & str = column_string.getDataAt(i).toString(); boost::geometry::read_wkt(str, geometry); serializer.add(geometry); } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 6f2078b7e48..7f2fe646062 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -22,14 +22,14 @@ namespace struct RepeatImpl { /// Safety threshold against DoS. - static inline void checkRepeatTime(UInt64 repeat_time) + static void checkRepeatTime(UInt64 repeat_time) { static constexpr UInt64 max_repeat_times = 1'000'000; if (repeat_time > max_repeat_times) throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too many times to repeat ({}), maximum is: {}", repeat_time, max_repeat_times); } - static inline void checkStringSize(UInt64 size) + static void checkStringSize(UInt64 size) { static constexpr UInt64 max_string_size = 1 << 30; if (size > max_string_size) @@ -238,9 +238,9 @@ public: { using DataType = std::decay_t; using T = typename DataType::FieldType; - const ColumnVector * column = checkAndGetColumn>(col_num.get()); + const ColumnVector & column = checkAndGetColumn>(*col_num); auto col_res = ColumnString::create(); - RepeatImpl::vectorStrVectorRepeat(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), column->getData()); + RepeatImpl::vectorStrVectorRepeat(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), column.getData()); res = std::move(col_res); return true; })) @@ -258,9 +258,9 @@ public: { using DataType = std::decay_t; using T = typename DataType::FieldType; - const ColumnVector * column = checkAndGetColumn>(col_num.get()); + const ColumnVector & column = checkAndGetColumn>(*col_num); auto col_res = ColumnString::create(); - RepeatImpl::constStrVectorRepeat(copy_str, col_res->getChars(), col_res->getOffsets(), column->getData()); + RepeatImpl::constStrVectorRepeat(copy_str, col_res->getChars(), col_res->getOffsets(), column.getData()); res = std::move(col_res); return true; })) diff --git a/src/Functions/reverse.cpp b/src/Functions/reverse.cpp index 32b998523c7..39608b77997 100644 --- a/src/Functions/reverse.cpp +++ b/src/Functions/reverse.cpp @@ -1,10 +1,10 @@ #include -#include #include #include #include #include #include +#include "reverse.h" namespace DB @@ -17,42 +17,6 @@ namespace ErrorCodes namespace { - -/** Reverse the string as a sequence of bytes. - */ -struct ReverseImpl -{ - static void vector(const ColumnString::Chars & data, - const ColumnString::Offsets & offsets, - ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) - { - res_data.resize(data.size()); - res_offsets.assign(offsets); - size_t size = offsets.size(); - - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) - { - for (size_t j = prev_offset; j < offsets[i] - 1; ++j) - res_data[j] = data[offsets[i] + prev_offset - 2 - j]; - res_data[offsets[i] - 1] = 0; - prev_offset = offsets[i]; - } - } - - static void vectorFixed(const ColumnString::Chars & data, size_t n, ColumnString::Chars & res_data) - { - res_data.resize(data.size()); - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) - for (size_t j = i * n; j < (i + 1) * n; ++j) - res_data[j] = data[(i * 2 + 1) * n - j - 1]; - } -}; - - class FunctionReverse : public IFunction { public: diff --git a/src/Functions/reverse.h b/src/Functions/reverse.h new file mode 100644 index 00000000000..5f999af4297 --- /dev/null +++ b/src/Functions/reverse.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +namespace DB +{ + +/** Reverse the string as a sequence of bytes. + */ +struct ReverseImpl +{ + static void vector(const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + res_data.resize_exact(data.size()); + res_offsets.assign(offsets); + size_t size = offsets.size(); + + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + for (size_t j = prev_offset; j < offsets[i] - 1; ++j) + res_data[j] = data[offsets[i] + prev_offset - 2 - j]; + res_data[offsets[i] - 1] = 0; + prev_offset = offsets[i]; + } + } + + static void vectorFixed(const ColumnString::Chars & data, size_t n, ColumnString::Chars & res_data) + { + res_data.resize_exact(data.size()); + size_t size = data.size() / n; + + for (size_t i = 0; i < size; ++i) + for (size_t j = i * n; j < (i + 1) * n; ++j) + res_data[j] = data[(i * 2 + 1) * n - j - 1]; + } +}; + +} diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp index 8a76af05d86..1aee349fa8d 100644 --- a/src/Functions/reverseUTF8.cpp +++ b/src/Functions/reverseUTF8.cpp @@ -1,7 +1,10 @@ -#include #include +#include #include #include +#include +#include +#include "reverse.h" namespace DB @@ -25,10 +28,18 @@ struct ReverseUTF8Impl ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { + bool all_ascii = isAllASCII(data.data(), data.size()); + if (all_ascii) + { + ReverseImpl::vector(data, offsets, res_data, res_offsets); + return; + } + res_data.resize(data.size()); res_offsets.assign(offsets); size_t size = offsets.size(); + ColumnString::Offset prev_offset = 0; for (size_t i = 0; i < size; ++i) { diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp index cca92c19b0c..38eda9f3383 100644 --- a/src/Functions/roundAge.cpp +++ b/src/Functions/roundAge.cpp @@ -12,7 +12,7 @@ struct RoundAgeImpl using ResultType = UInt8; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType apply(A x) + static ResultType apply(A x) { return x < 1 ? 0 : (x < 18 ? 17 diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp index 918f0b3425d..963080ba0d2 100644 --- a/src/Functions/roundDuration.cpp +++ b/src/Functions/roundDuration.cpp @@ -12,7 +12,7 @@ struct RoundDurationImpl using ResultType = UInt16; static constexpr bool allow_string_or_fixed_string = false; - static inline ResultType apply(A x) + static ResultType apply(A x) { return x < 1 ? 0 : (x < 10 ? 1 diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp index 607c67b742e..eb0df8884c5 100644 --- a/src/Functions/roundToExp2.cpp +++ b/src/Functions/roundToExp2.cpp @@ -65,7 +65,7 @@ struct RoundToExp2Impl using ResultType = T; static constexpr const bool allow_string_or_fixed_string = false; - static inline T apply(T x) + static T apply(T x) { return roundDownToPowerOfTwo(x); } diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index 9bf387d3357..d585affd91b 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -39,11 +39,11 @@ public: static FunctionPtr create(ContextPtr context) { - if (!context->getSettingsRef().allow_deprecated_functions) + if (!context->getSettingsRef().allow_deprecated_error_prone_window_functions) throw Exception( ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated since its usage is error-prone (see docs)." - "Please use proper window function or set `allow_deprecated_functions` setting to enable it", + "Please use proper window function or set `allow_deprecated_error_prone_window_functions` setting to enable it", name); return std::make_shared(); diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index d3704aa97ca..fe477d13744 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -139,11 +139,11 @@ public: static FunctionPtr create(ContextPtr context) { - if (!context->getSettingsRef().allow_deprecated_functions) + if (!context->getSettingsRef().allow_deprecated_error_prone_window_functions) throw Exception( ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated since its usage is error-prone (see docs)." - "Please use proper window function or set `allow_deprecated_functions` setting to enable it", + "Please use proper window function or set `allow_deprecated_error_prone_window_functions` setting to enable it", name); return std::make_shared>(); diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index da04d3b78d3..81fc904e16e 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -61,10 +61,10 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr col = arguments[0].column; - const ColumnArray * col_arr = checkAndGetColumn(col.get()); + const ColumnArray & col_arr = checkAndGetColumn(*col); - const IColumn & arr_data = col_arr->getData(); - const ColumnArray::Offsets & arr_offsets = col_arr->getOffsets(); + const IColumn & arr_data = col_arr.getData(); + const ColumnArray::Offsets & arr_offsets = col_arr.getOffsets(); ColumnPtr col_res; if (input_rows_count == 0) diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index fbaa2b14e64..e85b3a97c67 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -61,10 +61,10 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr array_ptr = arguments[0].column; - const ColumnArray * array = checkAndGetColumn(array_ptr.get()); + const ColumnArray & array = checkAndGetColumn(*array_ptr); - const IColumn & src_data = array->getData(); - const ColumnArray::Offsets & offsets = array->getOffsets(); + const IColumn & src_data = array.getData(); + const ColumnArray::Offsets & offsets = array.getOffsets(); auto res = ColumnFloat64::create(input_rows_count); auto & res_data = res->getData(); diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp index 6c849760eed..3dd2ac8e3aa 100644 --- a/src/Functions/sign.cpp +++ b/src/Functions/sign.cpp @@ -11,7 +11,7 @@ struct SignImpl using ResultType = Int8; static constexpr bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { if constexpr (is_decimal || std::is_floating_point_v) return a < A(0) ? -1 : a == A(0) ? 0 : 1; diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 84f08dd5440..22748e86888 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -34,7 +34,7 @@ namespace ErrorCodes /** sleep(seconds) - the specified number of seconds sleeps each columns. */ -enum class FunctionSleepVariant +enum class FunctionSleepVariant : uint8_t { PerBlock, PerRow diff --git a/src/Functions/soundex.cpp b/src/Functions/soundex.cpp index 0cddfc90f7c..77ddb14a6ec 100644 --- a/src/Functions/soundex.cpp +++ b/src/Functions/soundex.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 03dc0d06719..83183c991bc 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -27,7 +27,7 @@ private: static constexpr auto space = ' '; /// Safety threshold against DoS. - static inline void checkRepeatTime(size_t repeat_time) + static void checkRepeatTime(size_t repeat_time) { static constexpr auto max_repeat_times = 1'000'000uz; if (repeat_time > max_repeat_times) @@ -57,14 +57,14 @@ public: template bool executeConstant(ColumnPtr col_times, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars) const { - const ColumnConst * col_times_const = checkAndGetColumn(col_times.get()); + const ColumnConst & col_times_const = checkAndGetColumn(*col_times); - const ColumnPtr & col_times_const_internal = col_times_const->getDataColumnPtr(); + const ColumnPtr & col_times_const_internal = col_times_const.getDataColumnPtr(); if (!checkAndGetColumn(col_times_const_internal.get())) return false; using T = typename DataType::FieldType; - T times = col_times_const->getValue(); + T times = col_times_const.getValue(); if (times < 1) times = 0; diff --git a/src/Functions/splitByChar.cpp b/src/Functions/splitByChar.cpp index d3d5dc9fe4a..52db5623b89 100644 --- a/src/Functions/splitByChar.cpp +++ b/src/Functions/splitByChar.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/splitByNonAlpha.cpp b/src/Functions/splitByNonAlpha.cpp index 4486a33aa88..17ff6cfb0a8 100644 --- a/src/Functions/splitByNonAlpha.cpp +++ b/src/Functions/splitByNonAlpha.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index 430089f14ee..042db97794d 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -1,9 +1,11 @@ #include +#include +#include #include #include -#include #include -#include +#include +#include #include @@ -102,7 +104,7 @@ public: return false; } - pos += 1; + ++pos; token_end = pos; ++splits; } @@ -148,11 +150,67 @@ public: using FunctionSplitByRegexp = FunctionTokens; +/// Fallback splitByRegexp to splitByChar when its 1st argument is a trivial char for better performance +class SplitByRegexpOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "splitByRegexp"; + static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } + + explicit SplitByRegexpOverloadResolver(ContextPtr context_) + : context(context_) + , split_by_regexp(FunctionSplitByRegexp::create(context)) {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return SplitByRegexpImpl::getNumberOfArguments(); } + bool isVariadic() const override { return SplitByRegexpImpl::isVariadic(); } + /// ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return SplitByRegexpImpl::getArgumentsThatAreAlwaysConstant(); } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + if (patternIsTrivialChar(arguments)) + return FunctionFactory::instance().getImpl("splitByChar", context)->build(arguments); + else + return std::make_unique( + split_by_regexp, collections::map(arguments, [](const auto & elem) { return elem.type; }), return_type); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + return split_by_regexp->getReturnTypeImpl(arguments); + } + +private: + bool patternIsTrivialChar(const ColumnsWithTypeAndName & arguments) const + { + if (!arguments[0].column.get()) + return false; + const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get()); + if (!col) + return false; + + String pattern = col->getValue(); + if (pattern.size() == 1) + { + OptimizedRegularExpression re = Regexps::createRegexp(pattern); + + std::string required_substring; + bool is_trivial; + bool required_substring_is_prefix; + re.getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix); + return is_trivial && required_substring == pattern; + } + return false; + } + + ContextPtr context; + FunctionPtr split_by_regexp; +}; } REGISTER_FUNCTION(SplitByRegexp) { - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/splitByString.cpp b/src/Functions/splitByString.cpp index 5c97f9841e7..e9b70a58eab 100644 --- a/src/Functions/splitByString.cpp +++ b/src/Functions/splitByString.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/splitByWhitespace.cpp b/src/Functions/splitByWhitespace.cpp index cf21a218b15..5bf27f64c17 100644 --- a/src/Functions/splitByWhitespace.cpp +++ b/src/Functions/splitByWhitespace.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp index e809914f5f0..f1dea7db018 100644 --- a/src/Functions/substring.cpp +++ b/src/Functions/substring.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -148,9 +149,23 @@ public: if constexpr (is_utf8) { if (const ColumnString * col = checkAndGetColumn(column_string.get())) - return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, UTF8StringSource(*col), input_rows_count); + { + bool all_ascii = isAllASCII(col->getChars().data(), col->getChars().size()); + if (all_ascii) + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, StringSource(*col), input_rows_count); + else + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, UTF8StringSource(*col), input_rows_count); + } + if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) - return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource(*col_const), input_rows_count); + { + StringRef str_ref = col_const->getDataAt(0); + bool all_ascii = isAllASCII(reinterpret_cast(str_ref.data), str_ref.size); + if (all_ascii) + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource(*col_const), input_rows_count); + else + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource(*col_const), input_rows_count); + } throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); } else diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 5f3f054b624..15a321bd5b0 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -129,8 +130,10 @@ namespace res_data.reserve(str_column->getChars().size() / 2); res_offsets.reserve(rows); + bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); for (size_t i = 0; i < rows; ++i) { @@ -140,10 +143,12 @@ namespace StringRef res_ref; if constexpr (!is_utf8) res_ref = substringIndex(str_ref, delim[0], count); + else if (all_ascii) + res_ref = substringIndex(str_ref, delim[0], count); else res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); - appendToResultColumn(res_ref, res_data, res_offsets); + appendToResultColumn(res_ref, res_data, res_offsets); } } @@ -158,8 +163,10 @@ namespace res_data.reserve(str_column->getChars().size() / 2); res_offsets.reserve(rows); + bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); for (size_t i = 0; i < rows; ++i) { @@ -168,10 +175,12 @@ namespace StringRef res_ref; if constexpr (!is_utf8) res_ref = substringIndex(str_ref, delim[0], count); + else if (all_ascii) + res_ref = substringIndex(str_ref, delim[0], count); else res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); - appendToResultColumn(res_ref, res_data, res_offsets); + appendToResultColumn(res_ref, res_data, res_offsets); } } @@ -186,8 +195,10 @@ namespace res_data.reserve(str.size() * rows / 2); res_offsets.reserve(rows); + bool all_ascii = isAllASCII(reinterpret_cast(str.data()), str.size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); StringRef str_ref{str.data(), str.size()}; for (size_t i = 0; i < rows; ++i) @@ -197,18 +208,26 @@ namespace StringRef res_ref; if constexpr (!is_utf8) res_ref = substringIndex(str_ref, delim[0], count); + else if (all_ascii) + res_ref = substringIndex(str_ref, delim[0], count); else res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); - appendToResultColumn(res_ref, res_data, res_offsets); + appendToResultColumn(res_ref, res_data, res_offsets); } } + template static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { size_t res_offset = res_data.size(); res_data.resize(res_offset + res_ref.size + 1); - memcpy(&res_data[res_offset], res_ref.data, res_ref.size); + + if constexpr (padded) + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size); + else + memcpy(&res_data[res_offset], res_ref.data, res_ref.size); + res_offset += res_ref.size; res_data[res_offset] = 0; ++res_offset; diff --git a/src/Functions/tests/gtest_hilbert_curve.cpp b/src/Functions/tests/gtest_hilbert_curve.cpp new file mode 100644 index 00000000000..8e2c1b1b1aa --- /dev/null +++ b/src/Functions/tests/gtest_hilbert_curve.cpp @@ -0,0 +1,81 @@ +#include +#include "Functions/hilbertDecode2DLUT.h" +#include "Functions/hilbertEncode2DLUT.h" +#include "base/types.h" + + +TEST(HilbertLookupTable, EncodeBit1And3Consistency) +{ + const size_t bound = 1000; + for (size_t x = 0; x < bound; ++x) + { + for (size_t y = 0; y < bound; ++y) + { + auto hilbert1bit = DB::FunctionHilbertEncode2DWIthLookupTableImpl<1>::encode(x, y); + auto hilbert3bit = DB::FunctionHilbertEncode2DWIthLookupTableImpl<3>::encode(x, y); + ASSERT_EQ(hilbert1bit, hilbert3bit); + } + } +} + +TEST(HilbertLookupTable, EncodeBit2And3Consistency) +{ + const size_t bound = 1000; + for (size_t x = 0; x < bound; ++x) + { + for (size_t y = 0; y < bound; ++y) + { + auto hilbert2bit = DB::FunctionHilbertEncode2DWIthLookupTableImpl<2>::encode(x, y); + auto hilbert3bit = DB::FunctionHilbertEncode2DWIthLookupTableImpl<3>::encode(x, y); + ASSERT_EQ(hilbert3bit, hilbert2bit); + } + } +} + +TEST(HilbertLookupTable, DecodeBit1And3Consistency) +{ + const size_t bound = 1000 * 1000; + for (size_t hilbert_code = 0; hilbert_code < bound; ++hilbert_code) + { + auto res1 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<1>::decode(hilbert_code); + auto res3 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(hilbert_code); + ASSERT_EQ(res1, res3); + } +} + +TEST(HilbertLookupTable, DecodeBit2And3Consistency) +{ + const size_t bound = 1000 * 1000; + for (size_t hilbert_code = 0; hilbert_code < bound; ++hilbert_code) + { + auto res2 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<2>::decode(hilbert_code); + auto res3 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(hilbert_code); + ASSERT_EQ(res2, res3); + } +} + +TEST(HilbertLookupTable, DecodeAndEncodeAreInverseOperations) +{ + const size_t bound = 1000; + for (size_t x = 0; x < bound; ++x) + { + for (size_t y = 0; y < bound; ++y) + { + auto hilbert_code = DB::FunctionHilbertEncode2DWIthLookupTableImpl<3>::encode(x, y); + auto [x_new, y_new] = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(hilbert_code); + ASSERT_EQ(x_new, x); + ASSERT_EQ(y_new, y); + } + } +} + +TEST(HilbertLookupTable, EncodeAndDecodeAreInverseOperations) +{ + const size_t bound = 1000 * 1000; + for (size_t hilbert_code = 0; hilbert_code < bound; ++hilbert_code) + { + auto [x, y] = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(hilbert_code); + auto hilbert_new = DB::FunctionHilbertEncode2DWIthLookupTableImpl<3>::encode(x, y); + ASSERT_EQ(hilbert_new, hilbert_code); + } +} diff --git a/src/Functions/toFixedString.h b/src/Functions/toFixedString.h index 9c7ffc48004..23cab2a65f5 100644 --- a/src/Functions/toFixedString.h +++ b/src/Functions/toFixedString.h @@ -21,7 +21,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -enum class ConvertToFixedStringExceptionMode +enum class ConvertToFixedStringExceptionMode : uint8_t { Throw, Null diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 7f25a317466..50442d1b448 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -52,12 +52,14 @@ public: }; const DataTypeInterval * interval_type = nullptr; - enum class ResultType + + enum class ResultType : uint8_t { Date, DateTime, DateTime64 }; + ResultType result_type; auto check_second_argument = [&] { @@ -164,7 +166,7 @@ private: if (isDateTime64(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) @@ -172,13 +174,13 @@ private: } else if (isDateTime(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); } else if (isDate(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); } diff --git a/src/Functions/tokenExtractors.cpp b/src/Functions/tokenExtractors.cpp index a29d759d2ca..e7dcb5cced3 100644 --- a/src/Functions/tokenExtractors.cpp +++ b/src/Functions/tokenExtractors.cpp @@ -116,7 +116,7 @@ public: private: template - inline void executeImpl( + void executeImpl( const ExtractorType & extractor, StringColumnType & input_data_column, ResultStringColumnType & result_data_column, diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index c7173909029..2df08a5664e 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/trim.cpp b/src/Functions/trim.cpp index dd51c606ff7..1f0011b8e99 100644 --- a/src/Functions/trim.cpp +++ b/src/Functions/trim.cpp @@ -46,8 +46,8 @@ public: ColumnString::Offsets & res_offsets) { size_t size = offsets.size(); - res_offsets.resize(size); - res_data.reserve(data.size()); + res_offsets.resize_exact(size); + res_data.reserve_exact(data.size()); size_t prev_offset = 0; size_t res_offset = 0; diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h index cc616f5df8a..8b3e041f781 100644 --- a/src/Functions/tuple.h +++ b/src/Functions/tuple.h @@ -10,11 +10,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - /** tuple(x, y, ...) is a function that allows you to group several columns * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. */ @@ -45,14 +40,14 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); - return std::make_shared(arguments); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (arguments.empty()) + return ColumnTuple::create(input_rows_count); + size_t tuple_size = arguments.size(); Columns tuple_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp index 2744a0dabb8..80d34083d9d 100644 --- a/src/Functions/variantElement.cpp +++ b/src/Functions/variantElement.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -111,61 +112,15 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Variant or array of Variants. Actual {}", getName(), input_arg.type->getName()); - std::optional variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size()); + auto variant_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size()); - if (!variant_global_discr.has_value()) + if (!variant_discr) return arguments[2].column; - const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr); - const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr); - - /// If Variant has only NULLs or our variant doesn't have any real values, - /// just create column with default values and create null mask with 1. - if (input_col_as_variant->hasOnlyNulls() || variant_column->empty()) - { - auto res = variant_type->createColumn(); - - if (variant_type->lowCardinality()) - assert_cast(*res).nestedToNullable(); - - res->insertManyDefaults(input_col_as_variant->size()); - if (!variant_type->canBeInsideNullable()) - return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count); - - auto null_map = ColumnUInt8::create(); - auto & null_map_data = null_map->getData(); - null_map_data.resize_fill(input_col_as_variant->size(), 1); - return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(res), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count); - } - - /// If we extract single non-empty column and have no NULLs, then just return this variant. - if (auto non_empty_local_discr = input_col_as_variant->getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - /// If we were trying to extract some other variant, - /// it would be empty and we would already processed this case above. - chassert(input_col_as_variant->globalDiscriminatorByLocal(*non_empty_local_discr) == variant_global_discr); - return wrapInArraysAndConstIfNeeded(makeNullableOrLowCardinalityNullableSafe(variant_column), array_offsets, input_arg_is_const, input_rows_count); - } - - /// In general case we should calculate null-mask for variant - /// according to the discriminators column and expand - /// variant column by this mask to get a full column (with default values on NULLs) - const auto & local_discriminators = input_col_as_variant->getLocalDiscriminators(); - auto null_map = ColumnUInt8::create(); - auto & null_map_data = null_map->getData(); - null_map_data.reserve(local_discriminators.size()); - auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr); - for (auto local_discr : local_discriminators) - null_map_data.push_back(local_discr != variant_local_discr); - - auto expanded_variant_column = IColumn::mutate(variant_column); - if (variant_type->lowCardinality()) - expanded_variant_column = assert_cast(*expanded_variant_column).cloneNullable(); - expanded_variant_column->expand(null_map_data, /*inverted = */ true); - if (variant_type->canBeInsideNullable()) - return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(expanded_variant_column), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count); - return wrapInArraysAndConstIfNeeded(std::move(expanded_variant_column), array_offsets, input_arg_is_const, input_rows_count); + auto variant_column = input_type_as_variant->getSubcolumn(input_type_as_variant->getVariant(*variant_discr)->getName(), input_col_as_variant->getPtr()); + return wrapInArraysAndConstIfNeeded(std::move(variant_column), array_offsets, input_arg_is_const, input_rows_count); } + private: std::optional getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const { @@ -175,20 +130,16 @@ private: "Second argument to {} with Variant argument must be a constant String", getName()); - String variant_element_name = name_col->getValue(); - auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name); - if (variant_element_type) + auto variant_element_name = name_col->getValue(); + if (auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name)) { - const auto & variants = variant_type.getVariants(); - for (size_t i = 0; i != variants.size(); ++i) - { - if (variants[i]->getName() == variant_element_type->getName()) - return i; - } + if (auto discr = variant_type.tryGetVariantDiscriminator(variant_element_type->getName())) + return discr; } if (argument_size == 2) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} doesn't contain variant with type {}", variant_type.getName(), variant_element_name); + return std::nullopt; } @@ -213,10 +164,10 @@ REGISTER_FUNCTION(VariantElement) Extracts a column with specified type from a `Variant` column. )", .syntax{"variantElement(variant, type_name, [, default_value])"}, - .arguments{{ + .arguments{ {"variant", "Variant column"}, {"type_name", "The name of the variant type to extract"}, - {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}}, + {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}, .examples{{{ "Example", R"( diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index de4a6fb0a5c..7a2598f5f4b 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -12,6 +12,7 @@ namespace DB { + namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -19,6 +20,36 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; } +namespace +{ + +/// Checks that passed data types are tuples and have the same size. +/// Returns size of tuples. +size_t checkAndGetTuplesSize(const DataTypePtr & lhs_type, const DataTypePtr & rhs_type, const String & function_name = {}) +{ + const auto * left_tuple = checkAndGetDataType(lhs_type.get()); + const auto * right_tuple = checkAndGetDataType(rhs_type.get()); + + if (!left_tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0{} should be tuple, got {}", + function_name.empty() ? "" : fmt::format(" of function {}", function_name), lhs_type->getName()); + + if (!right_tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 1{}should be tuple, got {}", + function_name.empty() ? "" : fmt::format(" of function {}", function_name), rhs_type->getName()); + + const auto & left_types = left_tuple->getElements(); + const auto & right_types = right_tuple->getElements(); + + if (left_types.size() != right_types.size()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Expected tuples of the same size as arguments{}, got {} and {}", + function_name.empty() ? "" : fmt::format(" of function {}", function_name), lhs_type->getName(), rhs_type->getName()); + return left_types.size(); +} + +} + struct PlusName { static constexpr auto name = "plus"; }; struct MinusName { static constexpr auto name = "minus"; }; struct MultiplyName { static constexpr auto name = "multiply"; }; @@ -33,8 +64,7 @@ struct L2SquaredLabel { static constexpr auto name = "2Squared"; }; struct LinfLabel { static constexpr auto name = "inf"; }; struct LpLabel { static constexpr auto name = "p"; }; -/// str starts from the lowercase letter; not constexpr due to the compiler version -/*constexpr*/ std::string makeFirstLetterUppercase(const std::string& str) +constexpr std::string makeFirstLetterUppercase(const std::string & str) { std::string res(str); res[0] += 'A' - 'a'; @@ -57,35 +87,13 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - const auto * left_tuple = checkAndGetDataType(arguments[0].type.get()); - const auto * right_tuple = checkAndGetDataType(arguments[1].type.get()); + size_t tuple_size = checkAndGetTuplesSize(arguments[0].type, arguments[1].type, getName()); - if (!left_tuple) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuple, got {}", - getName(), arguments[0].type->getName()); + const auto & left_types = checkAndGetDataType(arguments[0].type.get())->getElements(); + const auto & right_types = checkAndGetDataType(arguments[1].type.get())->getElements(); - if (!right_tuple) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 1 of function {} should be tuple, got {}", - getName(), arguments[1].type->getName()); - - const auto & left_types = left_tuple->getElements(); - const auto & right_types = right_tuple->getElements(); - - Columns left_elements; - Columns right_elements; - if (arguments[0].column) - left_elements = getTupleElements(*arguments[0].column); - if (arguments[1].column) - right_elements = getTupleElements(*arguments[1].column); - - if (left_types.size() != right_types.size()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Expected tuples of the same size as arguments of function {}. Got {} and {}", - getName(), arguments[0].type->getName(), arguments[1].type->getName()); - - size_t tuple_size = left_types.size(); - if (tuple_size == 0) - return std::make_shared(); + Columns left_elements = arguments[0].column ? getTupleElements(*arguments[0].column) : Columns(); + Columns right_elements = arguments[1].column ? getTupleElements(*arguments[1].column) : Columns(); auto func = FunctionFactory::instance().get(FuncName::name, context); DataTypes types(tuple_size); @@ -119,7 +127,7 @@ public: size_t tuple_size = left_elements.size(); if (tuple_size == 0) - return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count); + return ColumnTuple::create(input_rows_count); auto func = FunctionFactory::instance().get(FuncName::name, context); Columns columns(tuple_size); @@ -177,9 +185,6 @@ public: cur_elements = getTupleElements(*arguments[0].column); size_t tuple_size = cur_types.size(); - if (tuple_size == 0) - return std::make_shared(); - auto negate = FunctionFactory::instance().get("negate", context); DataTypes types(tuple_size); for (size_t i = 0; i < tuple_size; ++i) @@ -197,7 +202,7 @@ public: } } - return std::make_shared(types); + return std::make_shared(std::move(types)); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -208,7 +213,7 @@ public: size_t tuple_size = cur_elements.size(); if (tuple_size == 0) - return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count); + return ColumnTuple::create(input_rows_count); auto negate = FunctionFactory::instance().get("negate", context); Columns columns(tuple_size); @@ -248,13 +253,9 @@ public: const auto & cur_types = cur_tuple->getElements(); - Columns cur_elements; - if (arguments[0].column) - cur_elements = getTupleElements(*arguments[0].column); + Columns cur_elements = arguments[0].column ? getTupleElements(*arguments[0].column) : Columns(); size_t tuple_size = cur_types.size(); - if (tuple_size == 0) - return std::make_shared(); const auto & p_column = arguments[1]; auto func = FunctionFactory::instance().get(FuncName::name, context); @@ -285,7 +286,7 @@ public: size_t tuple_size = cur_elements.size(); if (tuple_size == 0) - return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count); + return ColumnTuple::create(input_rows_count); const auto & p_column = arguments[1]; auto func = FunctionFactory::instance().get(FuncName::name, context); @@ -583,11 +584,14 @@ public: types = {arguments[0]}; } - const auto * interval_last = checkAndGetDataType(types.back().get()); - const auto * interval_new = checkAndGetDataType(arguments[1].get()); + if (!types.empty()) + { + const auto * interval_last = checkAndGetDataType(types.back().get()); + const auto * interval_new = checkAndGetDataType(arguments[1].get()); - if (!interval_last->equals(*interval_new)) - types.push_back(arguments[1]); + if (!interval_last->equals(*interval_new)) + types.push_back(arguments[1]); + } return std::make_shared(types); } @@ -632,14 +636,10 @@ public: size_t tuple_size = cur_elements.size(); if (tuple_size == 0) - { - can_be_merged = false; - } - else - { - const auto * tuple_last_interval = checkAndGetDataType(cur_types.back().get()); - can_be_merged = tuple_last_interval->equals(*second_interval); - } + return ColumnTuple::create(input_rows_count); + + const auto * tuple_last_interval = checkAndGetDataType(cur_types.back().get()); + can_be_merged = tuple_last_interval->equals(*second_interval); if (can_be_merged) tuple_columns.resize(tuple_size); @@ -726,9 +726,7 @@ public: const auto & cur_types = cur_tuple->getElements(); - Columns cur_elements; - if (arguments[0].column) - cur_elements = getTupleElements(*arguments[0].column); + Columns cur_elements = arguments[0].column ? getTupleElements(*arguments[0].column) : Columns(); size_t tuple_size = cur_types.size(); if (tuple_size == 0) @@ -1344,6 +1342,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { + size_t tuple_size = checkAndGetTuplesSize(arguments[0].type, arguments[1].type, getName()); + if (tuple_size == 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Result of function {} is undefined for empty tuples", getName()); + FunctionDotProduct dot(context); ColumnWithTypeAndName dot_result{dot.getReturnTypeImpl(arguments), {}}; @@ -1400,7 +1403,7 @@ public: divide_result.type, input_rows_count); auto minus_elem = minus->build({one, divide_result}); - return minus_elem->execute({one, divide_result}, minus_elem->getResultType(), {}); + return minus_elem->execute({one, divide_result}, minus_elem->getResultType(), input_rows_count); } }; diff --git a/src/Functions/ztest.cpp b/src/Functions/ztest.cpp index 55e1b59a897..c2579263674 100644 --- a/src/Functions/ztest.cpp +++ b/src/Functions/ztest.cpp @@ -98,23 +98,23 @@ public: static const auto uint64_data_type = std::make_shared>(); auto column_successes_x = castColumnAccurate(arguments[0], uint64_data_type); - const auto & data_successes_x = checkAndGetColumn>(column_successes_x.get())->getData(); + const auto & data_successes_x = checkAndGetColumn>(*column_successes_x).getData(); auto column_successes_y = castColumnAccurate(arguments[1], uint64_data_type); - const auto & data_successes_y = checkAndGetColumn>(column_successes_y.get())->getData(); + const auto & data_successes_y = checkAndGetColumn>(*column_successes_y).getData(); auto column_trials_x = castColumnAccurate(arguments[2], uint64_data_type); - const auto & data_trials_x = checkAndGetColumn>(column_trials_x.get())->getData(); + const auto & data_trials_x = checkAndGetColumn>(*column_trials_x).getData(); auto column_trials_y = castColumnAccurate(arguments[3], uint64_data_type); - const auto & data_trials_y = checkAndGetColumn>(column_trials_y.get())->getData(); + const auto & data_trials_y = checkAndGetColumn>(*column_trials_y).getData(); static const auto float64_data_type = std::make_shared>(); auto column_confidence_level = castColumnAccurate(arguments[4], float64_data_type); - const auto & data_confidence_level = checkAndGetColumn>(column_confidence_level.get())->getData(); + const auto & data_confidence_level = checkAndGetColumn>(*column_confidence_level).getData(); - String usevar = checkAndGetColumnConst(arguments[5].column.get())->getValue(); + String usevar = checkAndGetColumnConst(*arguments[5].column).getValue(); if (usevar != UNPOOLED && usevar != POOLED) throw Exception{ErrorCodes::BAD_ARGUMENTS, diff --git a/src/IO/Archives/LibArchiveReader.h b/src/IO/Archives/LibArchiveReader.h index 148d5dd17f2..b991cedcee5 100644 --- a/src/IO/Archives/LibArchiveReader.h +++ b/src/IO/Archives/LibArchiveReader.h @@ -2,7 +2,6 @@ #include #include -#include #include "config.h" diff --git a/src/IO/Archives/ZipArchiveWriter.h b/src/IO/Archives/ZipArchiveWriter.h index b2b77dce7e1..f4303e21f34 100644 --- a/src/IO/Archives/ZipArchiveWriter.h +++ b/src/IO/Archives/ZipArchiveWriter.h @@ -52,7 +52,7 @@ public: static constexpr const char kXz[] = "xz"; /// Some compression levels. - enum class CompressionLevels + enum class CompressionLevels : int8_t { kDefault = kDefaultCompressionLevel, kFast = 2, diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 769f1a184f6..8bd436f218c 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes { extern const int INVALID_CONFIG_PARAMETER; extern const int AZURE_BLOB_STORAGE_ERROR; + extern const int LOGICAL_ERROR; } namespace @@ -94,11 +95,56 @@ namespace void calculatePartSize() { - auto max_upload_part_size = settings->max_upload_part_size; - if (!max_upload_part_size) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be 0"); + if (!total_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen"); + + auto max_part_number = settings->max_blocks_in_multipart_upload; + const auto min_upload_part_size = settings->min_upload_part_size; + const auto max_upload_part_size = settings->max_upload_part_size; + + if (!max_part_number) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_blocks_in_multipart_upload must not be 0"); + else if (!min_upload_part_size) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "min_upload_part_size must not be 0"); + else if (max_upload_part_size < min_upload_part_size) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be less than min_upload_part_size"); + + size_t part_size = min_upload_part_size; + auto num_parts = (total_size + part_size - 1) / part_size; + + if (num_parts > max_part_number) + { + part_size = (total_size + max_part_number - 1) / max_part_number; + num_parts = (total_size + part_size - 1) / part_size; + } + + if (part_size > max_upload_part_size) + { + part_size = max_upload_part_size; + num_parts = (total_size + part_size - 1) / part_size; + } + + String error; + if (num_parts < 1) + error = "Number of parts is zero"; + else if (num_parts > max_part_number) + error = fmt::format("Number of parts exceeds {}/{}", num_parts, max_part_number); + else if (part_size < min_upload_part_size) + error = fmt::format("Size of a part is less than {}/{}", part_size, min_upload_part_size); + else if (part_size > max_upload_part_size) + error = fmt::format("Size of a part exceeds {}/{}", part_size, max_upload_part_size); + + if (!error.empty()) + { + throw Exception( + ErrorCodes::INVALID_CONFIG_PARAMETER, + "{} while writing {} bytes to Azure. Check max_part_number = {}, " + "min_upload_part_size = {}, max_upload_part_size = {}", + error, total_size, max_part_number, min_upload_part_size, max_upload_part_size); + } + /// We've calculated the size of a normal part (the final part can be smaller). - normal_part_size = max_upload_part_size; + normal_part_size = part_size; } public: @@ -219,21 +265,22 @@ namespace auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); - while (!read_buffer->eof()) - { - auto size = read_buffer->available(); - if (size > 0) - { - auto block_id = getRandomASCIIString(64); - Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast(read_buffer->position()), size); - block_blob_client.StageBlock(block_id, memory); - task.block_ids.emplace_back(block_id); - read_buffer->ignore(size); - LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}", dest_container_for_logging, dest_blob, block_id); - } - } - std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race - LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, Parts: {}", dest_container_for_logging, dest_blob, bg_tasks.size()); + + /// task.part_size is already normalized according to min_upload_part_size and max_upload_part_size. + size_t size_to_stage = task.part_size; + + PODArray memory; + memory.resize(size_to_stage); + WriteBufferFromVector> wb(memory); + + copyData(*read_buffer, wb, size_to_stage); + Azure::Core::IO::MemoryBodyStream stream(reinterpret_cast(memory.data()), size_to_stage); + + const auto & block_id = task.block_ids.emplace_back(getRandomASCIIString(64)); + block_blob_client.StageBlock(block_id, stream); + + LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}, size: {}", + dest_container_for_logging, dest_blob, block_id, size_to_stage); } @@ -289,6 +336,7 @@ void copyAzureBlobStorageFile( if (settings->use_native_copy) { + LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); ProfileEvents::increment(ProfileEvents::AzureCopyObject); if (dest_client->GetClickhouseOptions().IsClientForDisk) ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject); @@ -299,21 +347,32 @@ void copyAzureBlobStorageFile( if (size < settings->max_single_part_copy_size) { + LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy blob sync {} -> {}", src_blob, dest_blob); block_blob_client_dest.CopyFromUri(source_uri); } else { Azure::Storage::Blobs::StartBlobCopyOperation operation = block_blob_client_dest.StartCopyFromUri(source_uri); - // Wait for the operation to finish, checking for status every 100 second. auto copy_response = operation.PollUntilDone(std::chrono::milliseconds(100)); auto properties_model = copy_response.Value; - if (properties_model.CopySource.HasValue()) - { - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy failed"); - } + auto copy_status = properties_model.CopyStatus; + auto copy_status_description = properties_model.CopyStatusDescription; + + if (copy_status.HasValue() && copy_status.Value() == Azure::Storage::Blobs::Models::CopyStatus::Success) + { + LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob); + } + else + { + if (copy_status.HasValue()) + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} failed with status {} description {} (operation is done {})", + src_blob, dest_blob, copy_status.Value().ToString(), copy_status_description.Value(), operation.IsDone()); + else + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} didn't complete with success status (operation is done {})", src_blob, dest_blob, operation.IsDone()); + } } } else @@ -321,8 +380,8 @@ void copyAzureBlobStorageFile( LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob); auto create_read_buffer = [&] { - return std::make_unique(src_client, src_blob, read_settings, settings->max_single_read_retries, - settings->max_single_download_retries); + return std::make_unique( + src_client, src_blob, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); }; UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyAzureBlobStorageFile")}; diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 6ad54923ab5..9c20ee4cff0 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -4,8 +4,7 @@ #if USE_AZURE_BLOB_STORAGE -#include -#include +#include #include #include #include diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index e98f00270e2..62fe011c0b6 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -37,13 +37,13 @@ public: { Buffer(Position begin_pos_, Position end_pos_) : begin_pos(begin_pos_), end_pos(end_pos_) {} - inline Position begin() const { return begin_pos; } - inline Position end() const { return end_pos; } - inline size_t size() const { return size_t(end_pos - begin_pos); } - inline void resize(size_t size) { end_pos = begin_pos + size; } - inline bool empty() const { return size() == 0; } + Position begin() const { return begin_pos; } + Position end() const { return end_pos; } + size_t size() const { return size_t(end_pos - begin_pos); } + void resize(size_t size) { end_pos = begin_pos + size; } + bool empty() const { return size() == 0; } - inline void swap(Buffer & other) noexcept + void swap(Buffer & other) noexcept { std::swap(begin_pos, other.begin_pos); std::swap(end_pos, other.end_pos); @@ -71,21 +71,21 @@ public: } /// get buffer - inline Buffer & internalBuffer() { return internal_buffer; } + Buffer & internalBuffer() { return internal_buffer; } /// get the part of the buffer from which you can read / write data - inline Buffer & buffer() { return working_buffer; } + Buffer & buffer() { return working_buffer; } /// get (for reading and modifying) the position in the buffer - inline Position & position() { return pos; } + Position & position() { return pos; } /// offset in bytes of the cursor from the beginning of the buffer - inline size_t offset() const { return size_t(pos - working_buffer.begin()); } + size_t offset() const { return size_t(pos - working_buffer.begin()); } /// How many bytes are available for read/write - inline size_t available() const { return size_t(working_buffer.end() - pos); } + size_t available() const { return size_t(working_buffer.end() - pos); } - inline void swap(BufferBase & other) noexcept + void swap(BufferBase & other) noexcept { internal_buffer.swap(other.internal_buffer); working_buffer.swap(other.working_buffer); diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index b8e1134d422..22913125e99 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -52,7 +52,6 @@ std::string toContentEncodingName(CompressionMethod method) case CompressionMethod::None: return ""; } - UNREACHABLE(); } CompressionMethod chooseHTTPCompressionMethod(const std::string & list) diff --git a/src/IO/CompressionMethod.h b/src/IO/CompressionMethod.h index c10c4901d41..b970f4780c4 100644 --- a/src/IO/CompressionMethod.h +++ b/src/IO/CompressionMethod.h @@ -15,7 +15,7 @@ class WriteBuffer; * (they use non-standard framing, indexes, checksums...) */ -enum class CompressionMethod +enum class CompressionMethod : uint8_t { None, /// DEFLATE compression with gzip header and CRC32 checksum. diff --git a/src/IO/FileEncryptionCommon.h b/src/IO/FileEncryptionCommon.h index 87aa1194273..d77dcfb4258 100644 --- a/src/IO/FileEncryptionCommon.h +++ b/src/IO/FileEncryptionCommon.h @@ -18,7 +18,7 @@ namespace FileEncryption /// We chose to use CTR cipther algorithms because they have the following features which are important for us: /// - No right padding, so we can append encrypted files without deciphering; /// - One byte is always ciphered as one byte, so we get random access to encrypted files easily. -enum class Algorithm +enum class Algorithm : uint8_t { AES_128_CTR, /// Size of key is 16 bytes. AES_192_CTR, /// Size of key is 24 bytes. diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp index 41788fa8ce7..b5ac6a9b728 100644 --- a/src/IO/HTTPChunkedReadBuffer.cpp +++ b/src/IO/HTTPChunkedReadBuffer.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 6e1c886b9b0..9704d034b2a 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -48,7 +48,7 @@ HTTPSessionPtr makeHTTPSession( HTTPConnectionGroupType group, const Poco::URI & uri, const ConnectionTimeouts & timeouts, - ProxyConfiguration proxy_configuration) + const ProxyConfiguration & proxy_configuration) { auto connection_pool = HTTPConnectionPools::instance().getPool(group, uri, proxy_configuration); return connection_pool->getConnection(timeouts); diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 63dffcf6878..3a1fa5bebee 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -61,7 +61,7 @@ HTTPSessionPtr makeHTTPSession( HTTPConnectionGroupType group, const Poco::URI & uri, const ConnectionTimeouts & timeouts, - ProxyConfiguration proxy_config = {} + const ProxyConfiguration & proxy_config = {} ); bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status); diff --git a/src/IO/HTTPHeaderEntries.h b/src/IO/HTTPHeaderEntries.h index 5862f1ead15..36b2ccc4ba5 100644 --- a/src/IO/HTTPHeaderEntries.h +++ b/src/IO/HTTPHeaderEntries.h @@ -10,7 +10,7 @@ struct HTTPHeaderEntry std::string value; HTTPHeaderEntry(const std::string & name_, const std::string & value_) : name(name_), value(value_) {} - inline bool operator==(const HTTPHeaderEntry & other) const { return name == other.name && value == other.value; } + bool operator==(const HTTPHeaderEntry & other) const { return name == other.name && value == other.value; } }; using HTTPHeaderEntries = std::vector; diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h index b5fb1fec093..7d6e6db2fa7 100644 --- a/src/IO/HadoopSnappyReadBuffer.h +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -23,7 +23,7 @@ namespace DB class HadoopSnappyDecoder { public: - enum class Status : int + enum class Status : uint8_t { OK = 0, INVALID_INPUT = 1, @@ -37,7 +37,7 @@ public: Status readBlock(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out); - inline void reset() + void reset() { buffer_length = 0; block_length = -1; @@ -73,7 +73,7 @@ class HadoopSnappyReadBuffer : public CompressedReadBufferWrapper public: using Status = HadoopSnappyDecoder::Status; - inline static String statusToString(Status status) + static String statusToString(Status status) { switch (status) { @@ -88,7 +88,6 @@ public: case Status::TOO_LARGE_COMPRESSED_BLOCK: return "TOO_LARGE_COMPRESSED_BLOCK"; } - UNREACHABLE(); } explicit HadoopSnappyReadBuffer( diff --git a/src/IO/IReadableWriteBuffer.h b/src/IO/IReadableWriteBuffer.h index dda5fc07c8e..db379fef969 100644 --- a/src/IO/IReadableWriteBuffer.h +++ b/src/IO/IReadableWriteBuffer.h @@ -8,7 +8,7 @@ namespace DB struct IReadableWriteBuffer { /// At the first time returns getReadBufferImpl(). Next calls return nullptr. - inline std::unique_ptr tryGetReadBuffer() + std::unique_ptr tryGetReadBuffer() { if (!can_reread) return nullptr; diff --git a/src/IO/MMappedFileDescriptor.cpp b/src/IO/MMappedFileDescriptor.cpp index ebc4e7a6bbb..a7eb8e4ede5 100644 --- a/src/IO/MMappedFileDescriptor.cpp +++ b/src/IO/MMappedFileDescriptor.cpp @@ -75,7 +75,7 @@ void MMappedFileDescriptor::set(int fd_, size_t offset_) { size_t file_size = getFileSize(fd_); - if (offset > static_cast(file_size)) + if (offset > file_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "MMappedFileDescriptor: requested offset is greater than file size"); set(fd_, offset_, file_size - offset); @@ -101,5 +101,3 @@ MMappedFileDescriptor::~MMappedFileDescriptor() } } - - diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index 5718830db64..e6771235a8e 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -73,8 +73,9 @@ bool ParallelReadBuffer::addReaderToPool() auto worker = read_workers.emplace_back(std::make_shared(input, range_start, size)); - ++active_working_readers; schedule([this, my_worker = std::move(worker)]() mutable { readerThreadFunction(std::move(my_worker)); }, Priority{}); + /// increase number of workers only after we are sure that the reader was scheduled + ++active_working_readers; return true; } diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp index be650f2f3b4..d1838ce2c4f 100644 --- a/src/IO/PeekableReadBuffer.cpp +++ b/src/IO/PeekableReadBuffer.cpp @@ -283,9 +283,7 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append) { size_t pos_offset = pos - memory.data(); - size_t new_size_amortized = memory.size() * 2; - if (new_size_amortized < new_size) - new_size_amortized = new_size; + size_t new_size_amortized = std::max(memory.size() * 2, new_size); memory.resize(new_size_amortized); if (need_update_checkpoint) diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index 2ee209ffd6c..e831956956f 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -83,9 +83,9 @@ private: bool peekNext(); - inline bool useSubbufferOnly() const { return !peeked_size; } - inline bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); } - inline bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; } + bool useSubbufferOnly() const { return !peeked_size; } + bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); } + bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; } void checkStateCorrect() const; diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index 056e25a5fbe..73f5335411f 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -85,7 +85,7 @@ public: } - inline void nextIfAtEnd() + void nextIfAtEnd() { if (!hasPendingData()) next(); diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 57442a15853..76a80f145e7 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -173,7 +173,7 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end) return new_pos; - if (file_offset_of_buffer_end - working_buffer.size() <= static_cast(new_pos) + if (file_offset_of_buffer_end - working_buffer.size() <= new_pos && new_pos <= file_offset_of_buffer_end) { /// Position is still inside the buffer. diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 813546aa052..8823af55936 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -191,10 +191,14 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons result = sendRequest(attempt, range_begin, range_begin + n - 1); std::istream & istr = result->GetBody(); - copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied); + bool cancelled = false; + copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied, &cancelled); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, bytes_copied); + if (cancelled) + return initial_n - n + bytes_copied; + if (read_settings.remote_throttler) read_settings.remote_throttler->add(bytes_copied, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 68b61e96c51..c771fced73a 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -243,6 +243,18 @@ void readStringUntilWhitespace(String & s, ReadBuffer & buf) readStringUntilWhitespaceInto(s, buf); } +void readStringUntilAmpersand(String & s, ReadBuffer & buf) +{ + s.clear(); + readStringUntilCharsInto<'&'>(s, buf); +} + +void readStringUntilEquals(String & s, ReadBuffer & buf) +{ + s.clear(); + readStringUntilCharsInto<'='>(s, buf); +} + template void readNullTerminated>(PODArray & s, ReadBuffer & buf); template void readNullTerminated(String & s, ReadBuffer & buf); @@ -340,7 +352,6 @@ static ReturnType parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) { return error("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); } - s.push_back(unhex2(hex_code)); } else if (char_after_backslash == 'N') @@ -596,13 +607,20 @@ static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf, bool kee } -template +template void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) { while (!buf.eof()) { - char * next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end()); - + char * next_pos; + if constexpr (support_crlf) + { + next_pos = find_first_symbols<'\t', '\n', '\\','\r'>(buf.position(), buf.buffer().end()); + } + else + { + next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end()); + } appendToStringOrVector(s, buf, next_pos); buf.position() = next_pos; @@ -629,25 +647,46 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) } } } + + if constexpr (support_crlf) + { + if (*buf.position() == '\r') + { + ++buf.position(); + if (!buf.eof() && *buf.position() != '\n') + { + s.push_back('\r'); + continue; + } + return; + } + } } } -template +template void readEscapedStringInto(Vector & s, ReadBuffer & buf) { - readEscapedStringIntoImpl(s, buf); + readEscapedStringIntoImpl(s, buf); } void readEscapedString(String & s, ReadBuffer & buf) { s.clear(); - readEscapedStringInto(s, buf); + readEscapedStringInto(s, buf); } -template void readEscapedStringInto>(PaddedPODArray & s, ReadBuffer & buf); -template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); +void readEscapedStringCRLF(String & s, ReadBuffer & buf) +{ + s.clear(); + readEscapedStringInto(s, buf); +} +template void readEscapedStringInto,false>(PaddedPODArray & s, ReadBuffer & buf); +template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); +template void readEscapedStringInto,true>(PaddedPODArray & s, ReadBuffer & buf); +template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); /** If enable_sql_style_quoting == true, * strings like 'abc''def' will be parsed as abc'def. @@ -2057,7 +2096,14 @@ bool tryReadJSONField(String & s, ReadBuffer & buf, const FormatSettings::JSON & void readTSVField(String & s, ReadBuffer & buf) { s.clear(); - readEscapedStringIntoImpl(s, buf); + readEscapedStringIntoImpl(s, buf); } +void readTSVFieldCRLF(String & s, ReadBuffer & buf) +{ + s.clear(); + readEscapedStringIntoImpl(s, buf); +} + + } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index a136eb4d155..ffba4fafb5c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -30,7 +30,7 @@ #include #include -#include +#include #include #include @@ -232,7 +232,7 @@ inline bool checkStringCaseInsensitive(const String & s, ReadBuffer & buf) void assertStringCaseInsensitive(const char * s, ReadBuffer & buf); inline void assertStringCaseInsensitive(const String & s, ReadBuffer & buf) { - return assertStringCaseInsensitive(s.c_str(), buf); + assertStringCaseInsensitive(s.c_str(), buf); } /** Check that next character in buf matches first character of s. @@ -329,7 +329,7 @@ inline ReturnType readBoolTextWord(bool & x, ReadBuffer & buf, bool support_uppe return ReturnType(true); } -enum class ReadIntTextCheckOverflow +enum class ReadIntTextCheckOverflow : uint8_t { DO_NOT_CHECK_OVERFLOW, CHECK_OVERFLOW, @@ -583,6 +583,8 @@ void readString(String & s, ReadBuffer & buf); void readEscapedString(String & s, ReadBuffer & buf); +void readEscapedStringCRLF(String & s, ReadBuffer & buf); + void readQuotedString(String & s, ReadBuffer & buf); void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); @@ -609,6 +611,9 @@ void readEscapedStringUntilEOL(String & s, ReadBuffer & buf); /// Only 0x20 as whitespace character void readStringUntilWhitespace(String & s, ReadBuffer & buf); +void readStringUntilAmpersand(String & s, ReadBuffer & buf); +void readStringUntilEquals(String & s, ReadBuffer & buf); + /** Read string in CSV format. * Parsing rules: @@ -642,7 +647,7 @@ void readStringInto(Vector & s, ReadBuffer & buf); template void readNullTerminated(Vector & s, ReadBuffer & buf); -template +template void readEscapedStringInto(Vector & s, ReadBuffer & buf); template @@ -907,7 +912,7 @@ inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf) inline void readUUIDText(UUID & uuid, ReadBuffer & buf) { - return readUUIDTextImpl(uuid, buf); + readUUIDTextImpl(uuid, buf); } inline bool tryReadUUIDText(UUID & uuid, ReadBuffer & buf) @@ -929,7 +934,7 @@ inline ReturnType readIPv4TextImpl(IPv4 & ip, ReadBuffer & buf) inline void readIPv4Text(IPv4 & ip, ReadBuffer & buf) { - return readIPv4TextImpl(ip, buf); + readIPv4TextImpl(ip, buf); } inline bool tryReadIPv4Text(IPv4 & ip, ReadBuffer & buf) @@ -951,7 +956,7 @@ inline ReturnType readIPv6TextImpl(IPv6 & ip, ReadBuffer & buf) inline void readIPv6Text(IPv6 & ip, ReadBuffer & buf) { - return readIPv6TextImpl(ip, buf); + readIPv6TextImpl(ip, buf); } inline bool tryReadIPv6Text(IPv6 & ip, ReadBuffer & buf) @@ -1898,6 +1903,7 @@ void readJSONField(String & s, ReadBuffer & buf, const FormatSettings::JSON & se bool tryReadJSONField(String & s, ReadBuffer & buf, const FormatSettings::JSON & settings); void readTSVField(String & s, ReadBuffer & buf); +void readTSVFieldCRLF(String & s, ReadBuffer & buf); /** Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters). * It is assumed that the cursor is located on the `\` symbol diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 6c44861eae3..e73a9054928 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -10,7 +10,7 @@ namespace DB { -enum class LocalFSReadMethod +enum class LocalFSReadMethod : uint8_t { /** * Simple synchronous reads with 'read'. @@ -54,7 +54,7 @@ enum class LocalFSReadMethod pread_fake_async }; -enum class RemoteFSReadMethod +enum class RemoteFSReadMethod : uint8_t { read, threadpool, diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index 80366510b53..fa9d018eaa6 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -202,7 +202,7 @@ static Aws::String getAWSMetadataEndpoint() if (ec2_metadata_service_endpoint.empty()) { Aws::String ec2_metadata_service_endpoint_mode = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT_MODE"); - if (ec2_metadata_service_endpoint_mode.length() == 0) + if (ec2_metadata_service_endpoint_mode.empty()) { ec2_metadata_service_endpoint = "http://169.254.169.254"; //default to IPv4 default endpoint } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index de20a712d4c..1cef43530e0 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -305,8 +305,7 @@ void PocoHTTPClient::makeRequestInternal( Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const { - const auto request_configuration = per_request_configuration(); - makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); + makeRequestInternalImpl(request, response, readLimiter, writeLimiter); } String getMethod(const Aws::Http::HttpRequest & request) @@ -330,7 +329,6 @@ String getMethod(const Aws::Http::HttpRequest & request) void PocoHTTPClient::makeRequestInternalImpl( Aws::Http::HttpRequest & request, - const DB::ProxyConfiguration & proxy_configuration, std::shared_ptr & response, Aws::Utils::RateLimits::RateLimiterInterface *, Aws::Utils::RateLimits::RateLimiterInterface *) const @@ -383,6 +381,7 @@ void PocoHTTPClient::makeRequestInternalImpl( try { + const auto proxy_configuration = per_request_configuration(); for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index a0b35e9b4a9..88251b964e2 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -135,7 +135,7 @@ private: Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const; - enum class S3MetricType + enum class S3MetricType : uint8_t { Microseconds, Count, @@ -146,7 +146,7 @@ private: EnumSize, }; - enum class S3MetricKind + enum class S3MetricKind : uint8_t { Read, Write, @@ -156,7 +156,6 @@ private: void makeRequestInternalImpl( Aws::Http::HttpRequest & request, - const DB::ProxyConfiguration & proxy_configuration, std::shared_ptr & response, Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const; diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index 424cf65caf2..3b03356a8fb 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -169,7 +169,7 @@ using DeleteObjectsRequest = ExtendedRequest; class ComposeObjectRequest : public ExtendedRequest { public: - inline const char * GetServiceRequestName() const override { return "ComposeObject"; } + const char * GetServiceRequestName() const override { return "ComposeObject"; } AWS_S3_API Aws::String SerializePayload() const override; diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 4e679e6c477..4bf7a3ddf86 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -1,8 +1,7 @@ #include -#include -#include "Common/Macros.h" #include #include +#include "Common/Macros.h" #if USE_AWS_S3 #include #include @@ -55,7 +54,11 @@ URI::URI(const std::string & uri_) static constexpr auto OSS = "OSS"; static constexpr auto EOS = "EOS"; - uri = Poco::URI(uri_); + if (containsArchive(uri_)) + std::tie(uri_str, archive_pattern) = getPathToArchiveAndArchivePattern(uri_); + else + uri_str = uri_; + uri = Poco::URI(uri_str); std::unordered_map mapper; auto context = Context::getGlobalContextInstance(); @@ -126,9 +129,10 @@ URI::URI(const std::string & uri_) boost::to_upper(name); /// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", - quoteString(name)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", + quoteString(name)); if (name == COS) storage_name = COSN; @@ -156,10 +160,40 @@ void URI::validateBucket(const String & bucket, const Poco::URI & uri) /// S3 specification requires at least 3 and at most 63 characters in bucket name. /// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html if (bucket.length() < 3 || bucket.length() > 63) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}", - quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : ""); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}", + quoteString(bucket), + !uri.empty() ? " (" + uri.toString() + ")" : ""); } +bool URI::containsArchive(const std::string & source) +{ + size_t pos = source.find("::"); + return (pos != std::string::npos); +} + +std::pair URI::getPathToArchiveAndArchivePattern(const std::string & source) +{ + size_t pos = source.find("::"); + assert(pos != std::string::npos); + + std::string path_to_archive = source.substr(0, pos); + while ((!path_to_archive.empty()) && path_to_archive.ends_with(' ')) + path_to_archive.pop_back(); + + if (path_to_archive.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + + std::string_view path_in_archive_view = std::string_view{source}.substr(pos + 2); + while (path_in_archive_view.front() == ' ') + path_in_archive_view.remove_prefix(1); + + if (path_in_archive_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); + + return {path_to_archive, std::string{path_in_archive_view}}; +} } } diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 06b7d03aa8c..363f98c46f5 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "config.h" @@ -28,6 +29,9 @@ struct URI std::string key; std::string version_id; std::string storage_name; + /// Path (or path pattern) in archive if uri is an archive. + std::optional archive_pattern; + std::string uri_str; bool is_virtual_hosted_style; @@ -36,6 +40,10 @@ struct URI void addRegionToURI(const std::string & region); static void validateBucket(const std::string & bucket, const Poco::URI & uri); + +private: + bool containsArchive(const std::string & source); + std::pair getPathToArchiveAndArchivePattern(const std::string & source); }; } diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 549d0a569c6..d3968d883e8 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -149,16 +149,18 @@ namespace dest_bucket, dest_key, /* local_path_ */ {}, /* data_size */ 0, outcome.IsSuccess() ? nullptr : &outcome.GetError()); - if (outcome.IsSuccess()) - { - multipart_upload_id = outcome.GetResult().GetUploadId(); - LOG_TRACE(log, "Multipart upload has created. Bucket: {}, Key: {}, Upload id: {}", dest_bucket, dest_key, multipart_upload_id); - } - else + if (!outcome.IsSuccess()) { ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1); throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); } + multipart_upload_id = outcome.GetResult().GetUploadId(); + if (multipart_upload_id.empty()) + { + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1); + throw Exception(ErrorCodes::S3_ERROR, "Invalid CreateMultipartUpload result: missing UploadId."); + } + LOG_TRACE(log, "Multipart upload was created. Bucket: {}, Key: {}, Upload id: {}", dest_bucket, dest_key, multipart_upload_id); } void completeMultipartUpload() @@ -316,23 +318,23 @@ namespace num_parts = (total_size + part_size - 1) / part_size; } - if (num_parts < 1 || num_parts > max_part_number || part_size < min_upload_part_size || part_size > max_upload_part_size) - { - String msg; - if (num_parts < 1) - msg = "Number of parts is zero"; - else if (num_parts > max_part_number) - msg = fmt::format("Number of parts exceeds {}", num_parts, max_part_number); - else if (part_size < min_upload_part_size) - msg = fmt::format("Size of a part is less than {}", part_size, min_upload_part_size); - else - msg = fmt::format("Size of a part exceeds {}", part_size, max_upload_part_size); + String error; + if (num_parts < 1) + error = "Number of parts is zero"; + else if (num_parts > max_part_number) + error = fmt::format("Number of parts exceeds {}/{}", num_parts, max_part_number); + else if (part_size < min_upload_part_size) + error = fmt::format("Size of a part is less than {}/{}", part_size, min_upload_part_size); + else if (part_size > max_upload_part_size) + error = fmt::format("Size of a part exceeds {}/{}", part_size, max_upload_part_size); + if (!error.empty()) + { throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, "{} while writing {} bytes to S3. Check max_part_number = {}, " "min_upload_part_size = {}, max_upload_part_size = {}", - msg, total_size, max_part_number, min_upload_part_size, max_upload_part_size); + error, total_size, max_part_number, min_upload_part_size, max_upload_part_size); } /// We've calculated the size of a normal part (the final part can be smaller). @@ -652,14 +654,25 @@ namespace const std::optional> & object_metadata_, ThreadPoolCallbackRunnerUnsafe schedule_, bool for_disk_s3_, - BlobStorageLogWriterPtr blob_storage_log_) - : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyS3File")) + BlobStorageLogWriterPtr blob_storage_log_, + std::function fallback_method_) + : UploadHelper( + client_ptr_, + dest_bucket_, + dest_key_, + request_settings_, + object_metadata_, + schedule_, + for_disk_s3_, + blob_storage_log_, + getLogger("copyS3File")) , src_bucket(src_bucket_) , src_key(src_key_) , offset(src_offset_) , size(src_size_) , supports_multipart_copy(client_ptr_->supportsMultiPartCopy()) , read_settings(read_settings_) + , fallback_method(std::move(fallback_method_)) { } @@ -682,14 +695,7 @@ namespace size_t size; bool supports_multipart_copy; const ReadSettings read_settings; - - CreateReadBuffer getSourceObjectReadBuffer() - { - return [&] - { - return std::make_unique(client_ptr, src_bucket, src_key, "", request_settings, read_settings); - }; - } + std::function fallback_method; void performSingleOperationCopy() { @@ -744,28 +750,21 @@ namespace if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest" || outcome.GetError().GetExceptionName() == "InvalidArgument" || + outcome.GetError().GetExceptionName() == "AccessDenied" || (outcome.GetError().GetExceptionName() == "InternalError" && outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT && outcome.GetError().GetMessage().contains("use the Rewrite method in the JSON API"))) { - if (!supports_multipart_copy) + if (!supports_multipart_copy || outcome.GetError().GetExceptionName() == "AccessDenied") { - LOG_INFO(log, "Multipart upload using copy is not supported, will try regular upload for Bucket: {}, Key: {}, Object size: {}", - dest_bucket, - dest_key, - size); - copyDataToS3File( - getSourceObjectReadBuffer(), - offset, - size, - client_ptr, + LOG_INFO( + log, + "Multipart upload using copy is not supported, will try regular upload for Bucket: {}, Key: {}, Object size: " + "{}", dest_bucket, dest_key, - request_settings, - blob_storage_log, - object_metadata, - schedule, - for_disk_s3); + size); + fallback_method(); break; } else @@ -859,17 +858,29 @@ void copyDataToS3File( ThreadPoolCallbackRunnerUnsafe schedule, bool for_disk_s3) { - CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3, blob_storage_log}; + CopyDataToFileHelper helper{ + create_read_buffer, + offset, + size, + dest_s3_client, + dest_bucket, + dest_key, + settings, + object_metadata, + schedule, + for_disk_s3, + blob_storage_log}; helper.performCopy(); } void copyS3File( - const std::shared_ptr & s3_client, + const std::shared_ptr & src_s3_client, const String & src_bucket, const String & src_key, size_t src_offset, size_t src_size, + std::shared_ptr dest_s3_client, const String & dest_bucket, const String & dest_key, const S3Settings::RequestSettings & settings, @@ -879,19 +890,50 @@ void copyS3File( ThreadPoolCallbackRunnerUnsafe schedule, bool for_disk_s3) { - if (settings.allow_native_copy) + if (!dest_s3_client) + dest_s3_client = src_s3_client; + + std::function fallback_method = [&] { - CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3, blob_storage_log}; - helper.performCopy(); - } - else + auto create_read_buffer + = [&] { return std::make_unique(src_s3_client, src_bucket, src_key, "", settings, read_settings); }; + + copyDataToS3File( + create_read_buffer, + src_offset, + src_size, + dest_s3_client, + dest_bucket, + dest_key, + settings, + blob_storage_log, + object_metadata, + schedule, + for_disk_s3); + }; + + if (!settings.allow_native_copy) { - auto create_read_buffer = [&] - { - return std::make_unique(s3_client, src_bucket, src_key, "", settings, read_settings); - }; - copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, blob_storage_log, object_metadata, schedule, for_disk_s3); + fallback_method(); + return; } + + CopyFileHelper helper{ + src_s3_client, + src_bucket, + src_key, + src_offset, + src_size, + dest_bucket, + dest_key, + settings, + read_settings, + object_metadata, + schedule, + for_disk_s3, + blob_storage_log, + std::move(fallback_method)}; + helper.performCopy(); } } diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index d5da4d260b1..85b3870ddbf 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -31,11 +31,12 @@ using CreateReadBuffer = std::function()>; /// /// read_settings - is used for throttling in case of native copy is not possible void copyS3File( - const std::shared_ptr & s3_client, + const std::shared_ptr & src_s3_client, const String & src_bucket, const String & src_key, size_t src_offset, size_t src_size, + std::shared_ptr dest_s3_client, const String & dest_bucket, const String & dest_key, const S3Settings::RequestSettings & settings, diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index eee3da9fb74..78efda4ae57 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -53,7 +53,7 @@ namespace const auto & result = outcome.GetResult(); ObjectInfo object_info; object_info.size = static_cast(result.GetContentLength()); - object_info.last_modification_time = result.GetLastModified().Millis() / 1000; + object_info.last_modification_time = result.GetLastModified().Seconds(); if (with_metadata) object_info.metadata = result.GetMetadata(); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 56e3e0df21b..78c51fcb29c 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include "config.h" @@ -174,8 +174,11 @@ void AuthSettings::updateFrom(const AuthSettings & from) if (!from.session_token.empty()) session_token = from.session_token; - headers = from.headers; - region = from.region; + if (!from.headers.empty()) + headers = from.headers; + if (!from.region.empty()) + region = from.region; + server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64; server_side_encryption_kms_config = from.server_side_encryption_kms_config; diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp index 2ea30400ad9..3606ddd984c 100644 --- a/src/IO/SharedThreadPools.cpp +++ b/src/IO/SharedThreadPools.cpp @@ -20,6 +20,9 @@ namespace CurrentMetrics extern const Metric MergeTreeOutdatedPartsLoaderThreads; extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive; extern const Metric MergeTreeOutdatedPartsLoaderThreadsScheduled; + extern const Metric MergeTreeUnexpectedPartsLoaderThreads; + extern const Metric MergeTreeUnexpectedPartsLoaderThreadsActive; + extern const Metric MergeTreeUnexpectedPartsLoaderThreadsScheduled; extern const Metric DatabaseReplicatedCreateTablesThreads; extern const Metric DatabaseReplicatedCreateTablesThreadsActive; extern const Metric DatabaseReplicatedCreateTablesThreadsScheduled; @@ -151,6 +154,12 @@ StaticThreadPool & getOutdatedPartsLoadingThreadPool() return instance; } +StaticThreadPool & getUnexpectedPartsLoadingThreadPool() +{ + static StaticThreadPool instance("MergeTreeUnexpectedPartsLoaderThreadPool", CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreads, CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreadsActive, CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreadsScheduled); + return instance; +} + StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool() { static StaticThreadPool instance("CreateTablesThreadPool", CurrentMetrics::DatabaseReplicatedCreateTablesThreads, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsActive, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsScheduled); diff --git a/src/IO/SharedThreadPools.h b/src/IO/SharedThreadPools.h index acc5368f8ac..50adc70c9a0 100644 --- a/src/IO/SharedThreadPools.h +++ b/src/IO/SharedThreadPools.h @@ -64,6 +64,8 @@ StaticThreadPool & getPartsCleaningThreadPool(); /// the number of threads by calling enableTurboMode() :-) StaticThreadPool & getOutdatedPartsLoadingThreadPool(); +StaticThreadPool & getUnexpectedPartsLoadingThreadPool(); + /// ThreadPool used for creating tables in DatabaseReplicated. StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool(); diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 6dce8008170..2c41ae0a9c5 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -109,8 +109,9 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { if (istr.buffer().end() - istr.position() >= 10) - return varint_impl::readVarUInt(x, istr); - return varint_impl::readVarUInt(x, istr); + varint_impl::readVarUInt(x, istr); + else + varint_impl::readVarUInt(x, istr); } inline void readVarUInt(UInt64 & x, std::istream & istr) diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index 1ceb938e454..ef4e0058ec3 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -41,7 +41,7 @@ public: * If direct write is performed into [position(), buffer().end()) and its length is not enough, * you need to fill it first (i.g with write call), after it the capacity is regained. */ - inline void next() + void next() { if (!offset()) return; @@ -69,7 +69,7 @@ public: /// Calling finalize() in the destructor of derived classes is a bad practice. virtual ~WriteBuffer(); - inline void nextIfAtEnd() + void nextIfAtEnd() { if (!hasPendingData()) next(); @@ -96,7 +96,7 @@ public: } } - inline void write(char x) + void write(char x) { if (finalized) throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized buffer"}; diff --git a/src/IO/WriteBufferFromPocoSocket.h b/src/IO/WriteBufferFromPocoSocket.h index 9c5509aebd1..1f69dfc466c 100644 --- a/src/IO/WriteBufferFromPocoSocket.h +++ b/src/IO/WriteBufferFromPocoSocket.h @@ -37,11 +37,11 @@ protected: void socketSendBytes(const char * ptr, size_t size); void socketSendStr(const std::string & str) { - return socketSendBytes(str.data(), str.size()); + socketSendBytes(str.data(), str.size()); } void socketSendStr(const char * ptr) { - return socketSendBytes(ptr, strlen(ptr)); + socketSendBytes(ptr, strlen(ptr)); } Poco::Net::Socket & socket; diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 3ea372f75d8..b796c029051 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -339,7 +339,10 @@ void WriteBufferFromS3::allocateBuffer() chassert(0 == hidden_size); if (buffer_allocation_policy->getBufferNumber() == 1) - return allocateFirstBuffer(); + { + allocateFirstBuffer(); + return; + } memory = Memory(buffer_allocation_policy->getBufferSize()); WriteBuffer::set(memory.data(), memory.size()); @@ -410,7 +413,13 @@ void WriteBufferFromS3::createMultipartUpload() multipart_upload_id = outcome.GetResult().GetUploadId(); - LOG_TRACE(limitedLog, "Multipart upload has created. {}", getShortLogDetails()); + if (multipart_upload_id.empty()) + { + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1); + throw Exception(ErrorCodes::S3_ERROR, "Invalid CreateMultipartUpload result: missing UploadId."); + } + + LOG_TRACE(limitedLog, "Multipart upload was created. {}", getShortLogDetails()); } void WriteBufferFromS3::abortMultipartUpload() diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 1df559b252c..fbfec3588fa 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -12,13 +12,13 @@ #include #include #include -#include #include #include #include #include + namespace DB { /** diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h index a2ecc34f1ab..1ea32af2968 100644 --- a/src/IO/WriteBufferFromVector.h +++ b/src/IO/WriteBufferFromVector.h @@ -71,7 +71,7 @@ private: { vector.resize( ((position() - reinterpret_cast(vector.data())) /// NOLINT - + sizeof(ValueType) - 1) /// Align up. + + sizeof(ValueType) - 1) /// Align up. /// NOLINT / sizeof(ValueType)); /// Prevent further writes. diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index 88c706a590f..622f93719ac 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -102,17 +102,17 @@ static inline void writeProbablyQuotedStringImpl(StringRef s, WriteBuffer & buf, void writeProbablyBackQuotedString(StringRef s, WriteBuffer & buf) { - writeProbablyQuotedStringImpl(s, buf, [](StringRef s_, WriteBuffer & buf_) { return writeBackQuotedString(s_, buf_); }); + writeProbablyQuotedStringImpl(s, buf, [](StringRef s_, WriteBuffer & buf_) { writeBackQuotedString(s_, buf_); }); } void writeProbablyDoubleQuotedString(StringRef s, WriteBuffer & buf) { - writeProbablyQuotedStringImpl(s, buf, [](StringRef s_, WriteBuffer & buf_) { return writeDoubleQuotedString(s_, buf_); }); + writeProbablyQuotedStringImpl(s, buf, [](StringRef s_, WriteBuffer & buf_) { writeDoubleQuotedString(s_, buf_); }); } void writeProbablyBackQuotedStringMySQL(StringRef s, WriteBuffer & buf) { - writeProbablyQuotedStringImpl(s, buf, [](StringRef s_, WriteBuffer & buf_) { return writeBackQuotedStringMySQL(s_, buf_); }); + writeProbablyQuotedStringImpl(s, buf, [](StringRef s_, WriteBuffer & buf_) { writeBackQuotedStringMySQL(s_, buf_); }); } void writePointerHex(const void * ptr, WriteBuffer & buf) diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index a30e2feb439..d4b2d8ea0dc 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.h b/src/IO/ZstdDeflatingAppendableWriteBuffer.h index d9c4f32d6da..34cdf03df25 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.h +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.h @@ -27,7 +27,7 @@ class ZstdDeflatingAppendableWriteBuffer : public BufferWithOwnMemory; /// Frame end block. If we read non-empty file and see no such flag we should add it. - static inline constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00}; + static constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00}; ZstdDeflatingAppendableWriteBuffer( std::unique_ptr out_, diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp index c499542fedb..91139ad94eb 100644 --- a/src/IO/examples/read_buffer_from_hdfs.cpp +++ b/src/IO/examples/read_buffer_from_hdfs.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/IO/examples/read_write_int.cpp b/src/IO/examples/read_write_int.cpp index 49374878790..ef2155731bf 100644 --- a/src/IO/examples/read_write_int.cpp +++ b/src/IO/examples/read_write_int.cpp @@ -17,7 +17,7 @@ int main(int, char **) Int64 x2 = 0; std::string s; - std::cerr << static_cast(x1) << std::endl; + std::cerr << x1 << std::endl; { DB::WriteBufferFromString wb(s); @@ -31,7 +31,7 @@ int main(int, char **) DB::readIntText(x2, rb); } - std::cerr << static_cast(x2) << std::endl; + std::cerr << x2 << std::endl; } catch (const DB::Exception & e) { diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 83928b32f2f..e046e837689 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -712,12 +712,12 @@ bool tryParseDateTimeBestEffortUS(time_t & res, ReadBuffer & in, const DateLUTIm void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { - return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); + parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); } void parseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { - return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); + parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); } bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index d1652784cc2..3a21d7201a9 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunneeded-internal-declaration" diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index 447b72ed7c6..4a4d7cc0fc2 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -547,7 +547,7 @@ public: std::unique_ptr getWriteBuffer(String file_name = "file") { S3Settings::RequestSettings request_settings; - request_settings.updateFromSettings(settings); + request_settings.updateFromSettingsIfChanged(settings); client->resetCounters(); diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index ed06b1d0fc6..a137404a669 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -255,7 +255,7 @@ namespace if (roles_to_revoke.all) boost::range::set_difference(all_granted_roles_set, roles_to_revoke.except_ids, std::back_inserter(roles_to_revoke_ids)); else - boost::range::remove_erase_if(roles_to_revoke_ids, [&](const UUID & id) { return !all_granted_roles_set.count(id); }); + std::erase_if(roles_to_revoke_ids, [&](const UUID & id) { return !all_granted_roles_set.count(id); }); roles_to_revoke = roles_to_revoke_ids; current_user_access.checkAdminOption(roles_to_revoke_ids); diff --git a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp index 76979ed86c8..71fc1047cfa 100644 --- a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 1147d74c146..96d8e55a74c 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 30eb908330b..cfccc835d29 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -21,6 +21,9 @@ #include #include +#include +#include + namespace DB { @@ -589,7 +592,7 @@ void ActionsDAG::removeUnusedActions(const std::unordered_set & us } } - nodes.remove_if([&](const Node & node) { return !visited_nodes.contains(&node); }); + std::erase_if(nodes, [&](const Node & node) { return !visited_nodes.contains(&node); }); std::erase_if(inputs, [&](const Node * node) { return !visited_nodes.contains(node); }); } @@ -708,16 +711,18 @@ static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Nod return res_column; } -Block ActionsDAG::updateHeader(Block header) const +Block ActionsDAG::updateHeader(const Block & header) const { IntermediateExecutionResult node_to_column; std::set pos_to_remove; { - std::unordered_map> input_positions; + using inline_vector = absl::InlinedVector; // 64B, holding max 7 size_t elements inlined + absl::flat_hash_map input_positions; - for (size_t pos = 0; pos < inputs.size(); ++pos) - input_positions[inputs[pos]->result_name].emplace_back(pos); + /// We insert from last to first in the inlinedVector so it's easier to pop_back matches later + for (size_t pos = inputs.size(); pos != 0; pos--) + input_positions[inputs[pos - 1]->result_name].emplace_back(pos - 1); for (size_t pos = 0; pos < header.columns(); ++pos) { @@ -725,10 +730,11 @@ Block ActionsDAG::updateHeader(Block header) const auto it = input_positions.find(col.name); if (it != input_positions.end() && !it->second.empty()) { - auto & list = it->second; pos_to_remove.insert(pos); - node_to_column[inputs[list.front()]] = col; - list.pop_front(); + + auto & v = it->second; + node_to_column[inputs[v.back()]] = col; + v.pop_back(); } } } @@ -746,18 +752,21 @@ Block ActionsDAG::updateHeader(Block header) const throw; } - if (isInputProjected()) - header.clear(); - else - header.erase(pos_to_remove); Block res; - + res.reserve(result_columns.size()); for (auto & col : result_columns) res.insert(std::move(col)); - for (auto && item : header) - res.insert(std::move(item)); + if (isInputProjected()) + return res; + + res.reserve(header.columns() - pos_to_remove.size()); + for (size_t i = 0; i < header.columns(); i++) + { + if (!pos_to_remove.contains(i)) + res.insert(header.data[i]); + } return res; } @@ -1612,7 +1621,7 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) first.projected_output = second.projected_output; } -void ActionsDAG::mergeNodes(ActionsDAG && second) +void ActionsDAG::mergeNodes(ActionsDAG && second, NodeRawConstPtrs * out_outputs) { std::unordered_map node_name_to_node; for (auto & node : nodes) @@ -1668,6 +1677,12 @@ void ActionsDAG::mergeNodes(ActionsDAG && second) nodes_to_process.pop_back(); } + if (out_outputs) + { + for (auto & node : second.getOutputs()) + out_outputs->push_back(node_name_to_node.at(node->result_name)); + } + if (nodes_to_move_from_second_dag.empty()) return; @@ -2879,6 +2894,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( FunctionOverloadResolverPtr function_overload_resolver; + String result_name; if (node->function_base->getName() == "indexHint") { ActionsDAG::NodeRawConstPtrs children; @@ -2899,6 +2915,11 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( auto index_hint_function_clone = std::make_shared(); index_hint_function_clone->setActions(std::move(index_hint_filter_dag)); function_overload_resolver = std::make_shared(std::move(index_hint_function_clone)); + /// Keep the unique name like "indexHint(foo)" instead of replacing it + /// with "indexHint()". Otherwise index analysis (which does look at + /// indexHint arguments that we're hiding here) will get confused by the + /// multiple substantially different nodes with the same result name. + result_name = node->result_name; } } } @@ -2913,7 +2934,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( function_base, std::move(function_children), std::move(arguments), - {}, + result_name, node->result_type, all_const); break; diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 8c0bcf8fdc0..8c0e3f0e576 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -51,7 +51,7 @@ class ActionsDAG { public: - enum class ActionType + enum class ActionType : uint8_t { /// Column which must be in input. INPUT, @@ -272,7 +272,7 @@ public: /// /// In addition, check that result constants are constants according to DAG. /// In case if function return constant, but arguments are not constant, materialize it. - Block updateHeader(Block header) const; + Block updateHeader(const Block & header) const; using IntermediateExecutionResult = std::unordered_map; static ColumnsWithTypeAndName evaluatePartialResult( @@ -288,7 +288,7 @@ public: /// Apply materialize() function to node. Result node has the same name. const Node & materializeNode(const Node & node); - enum class MatchColumnsMode + enum class MatchColumnsMode : uint8_t { /// Require same number of columns in source and result. Match columns by corresponding positions, regardless to names. Position, @@ -324,8 +324,9 @@ public: /// So that pointers to nodes are kept valid. void mergeInplace(ActionsDAG && second); - /// Merge current nodes with specified dag nodes - void mergeNodes(ActionsDAG && second); + /// Merge current nodes with specified dag nodes. + /// *out_outputs is filled with pointers to the nodes corresponding to second.getOutputs(). + void mergeNodes(ActionsDAG && second, NodeRawConstPtrs * out_outputs = nullptr); struct SplitResult { diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 504b7257563..0bdd4c089f1 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1323,7 +1323,9 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, Data & data) { DataTypePtr type; - if (data.getContext()->getSettingsRef().allow_experimental_variant_type && data.getContext()->getSettingsRef().use_variant_as_common_type) + if (literal.custom_type) + type = literal.custom_type; + else if (data.getContext()->getSettingsRef().allow_experimental_variant_type && data.getContext()->getSettingsRef().use_variant_as_common_type) type = applyVisitor(FieldToDataType(), literal.value); else type = applyVisitor(FieldToDataType(), literal.value); diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 643bf50ecd4..046c7387ee8 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -78,7 +78,7 @@ class ASTIdentifier; class ASTFunction; class ASTLiteral; -enum class GroupByKind +enum class GroupByKind : uint8_t { NONE, ORDINARY, diff --git a/src/Interpreters/AggregatedDataVariants.cpp b/src/Interpreters/AggregatedDataVariants.cpp index 87cfdda5948..8f82f15248f 100644 --- a/src/Interpreters/AggregatedDataVariants.cpp +++ b/src/Interpreters/AggregatedDataVariants.cpp @@ -117,8 +117,6 @@ size_t AggregatedDataVariants::size() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } - - UNREACHABLE(); } size_t AggregatedDataVariants::sizeWithoutOverflowRow() const @@ -136,8 +134,6 @@ size_t AggregatedDataVariants::sizeWithoutOverflowRow() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } - - UNREACHABLE(); } const char * AggregatedDataVariants::getMethodName() const @@ -155,8 +151,6 @@ const char * AggregatedDataVariants::getMethodName() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } - - UNREACHABLE(); } bool AggregatedDataVariants::isTwoLevel() const @@ -174,8 +168,6 @@ bool AggregatedDataVariants::isTwoLevel() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } - - UNREACHABLE(); } bool AggregatedDataVariants::isConvertibleToTwoLevel() const diff --git a/src/Interpreters/AggregatedDataVariants.h b/src/Interpreters/AggregatedDataVariants.h index 9f7185db9fc..99c136735e5 100644 --- a/src/Interpreters/AggregatedDataVariants.h +++ b/src/Interpreters/AggregatedDataVariants.h @@ -288,7 +288,7 @@ struct AggregatedDataVariants : private boost::noncopyable M(low_cardinality_key_string_two_level) \ M(low_cardinality_key_fixed_string_two_level) - enum class Type + enum class Type : uint8_t { EMPTY = 0, without_key, diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 2db07bb77f6..45b43ae2d3a 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1803,10 +1803,8 @@ void Aggregator::writeToTemporaryFileImpl( size_t block_size_rows = block.rows(); size_t block_size_bytes = block.bytes(); - if (block_size_rows > max_temporary_block_size_rows) - max_temporary_block_size_rows = block_size_rows; - if (block_size_bytes > max_temporary_block_size_bytes) - max_temporary_block_size_bytes = block_size_bytes; + max_temporary_block_size_rows = std::max(block_size_rows, max_temporary_block_size_rows); + max_temporary_block_size_bytes = std::max(block_size_bytes, max_temporary_block_size_bytes); }; for (UInt32 bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket) @@ -3160,7 +3158,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari result.aggregates_pools.push_back(std::make_shared()); Arena * aggregates_pool = result.aggregates_pools.back().get(); - auto task = [group = CurrentThread::getGroup(), bucket, &merge_bucket, aggregates_pool]{ return merge_bucket(bucket, aggregates_pool, group); }; + auto task = [group = CurrentThread::getGroup(), bucket, &merge_bucket, aggregates_pool]{ merge_bucket(bucket, aggregates_pool, group); }; if (thread_pool) thread_pool->scheduleOrThrowOnError(task); diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 0bad3f7ed16..d72f3d81549 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -667,11 +667,7 @@ namespace using TimePoint = std::chrono::time_point; void appendElementsToLogSafe( - AsynchronousInsertLog & log, - std::vector elements, - TimePoint flush_time, - const String & flush_query_id, - const String & flush_exception) + AsynchronousInsertLog & log, std::vector elements, TimePoint flush_time, const String & flush_exception) try { using Status = AsynchronousInsertLogElement::Status; @@ -680,7 +676,6 @@ try { elem.flush_time = timeInSeconds(flush_time); elem.flush_time_microseconds = timeInMicroseconds(flush_time); - elem.flush_query_id = flush_query_id; elem.exception = flush_exception; elem.status = flush_exception.empty() ? Status::Ok : Status::FlushError; log.add(std::move(elem)); @@ -700,7 +695,6 @@ String serializeQuery(const IAST & query, size_t max_length) } -// static void AsynchronousInsertQueue::processData( InsertQuery key, InsertDataPtr data, ContextPtr global_context, QueueShardFlushTimeHistory & queue_shard_flush_time_history) try @@ -710,6 +704,8 @@ try SCOPE_EXIT(CurrentMetrics::sub(CurrentMetrics::PendingAsyncInsert, data->entries.size())); + setThreadName("AsyncInsertQ"); + const auto log = getLogger("AsynchronousInsertQueue"); const auto & insert_query = assert_cast(*key.query); @@ -808,12 +804,12 @@ try throw; } - auto add_entry_to_log = [&](const auto & entry, - const auto & entry_query_for_logging, - const auto & exception, - size_t num_rows, - size_t num_bytes, - Milliseconds timeout_ms) + auto add_entry_to_asynchronous_insert_log = [&](const auto & entry, + const auto & entry_query_for_logging, + const auto & exception, + size_t num_rows, + size_t num_bytes, + Milliseconds timeout_ms) { if (!async_insert_log) return; @@ -831,6 +827,7 @@ try elem.exception = exception; elem.data_kind = entry->chunk.getDataKind(); elem.timeout_milliseconds = timeout_ms.count(); + elem.flush_query_id = insert_query_id; /// If there was a parsing error, /// the entry won't be flushed anyway, @@ -857,7 +854,7 @@ try if (!log_elements.empty()) { auto flush_time = std::chrono::system_clock::now(); - appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, ""); + appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, ""); } }; @@ -865,15 +862,27 @@ try auto header = pipeline.getHeader(); if (key.data_kind == DataKind::Parsed) - chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_log); + chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log); else - chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_log); + chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_asynchronous_insert_log); ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows()); + auto log_and_add_finish_to_query_log = [&](size_t num_rows, size_t num_bytes) + { + LOG_DEBUG(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str); + queue_shard_flush_time_history.updateWithCurrentTime(); + + bool pulling_pipeline = false; + logQueryFinish( + query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal); + }; + + if (chunk.getNumRows() == 0) { finish_entries(); + log_and_add_finish_to_query_log(0, 0); return; } @@ -888,12 +897,7 @@ try CompletedPipelineExecutor completed_executor(pipeline); completed_executor.execute(); - LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str); - - queue_shard_flush_time_history.updateWithCurrentTime(); - - bool pulling_pipeline = false; - logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal); + log_and_add_finish_to_query_log(num_rows, num_bytes); } catch (...) { @@ -903,7 +907,7 @@ try { auto exception = getCurrentExceptionMessage(false); auto flush_time = std::chrono::system_clock::now(); - appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, exception); + appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, exception); } throw; } diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index a8ee064877d..da14b43d276 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -48,7 +48,7 @@ public: Block insert_block{}; }; - enum class DataKind + enum class DataKind : uint8_t { Parsed = 0, Preprocessed = 1, diff --git a/src/Interpreters/BloomFilterHash.h b/src/Interpreters/BloomFilterHash.h index 45098ecff99..8248e9e4469 100644 --- a/src/Interpreters/BloomFilterHash.h +++ b/src/Interpreters/BloomFilterHash.h @@ -108,7 +108,7 @@ struct BloomFilterHash { const auto * array_col = typeid_cast(column.get()); - if (checkAndGetColumn(array_col->getData())) + if (checkAndGetColumn(&array_col->getData())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName()); const auto & offsets = array_col->getOffsets(); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 1ded737941c..0d33e39ffa3 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1074,7 +1074,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc() void FileCache::iterate(IterateFunc && func, const UserID & user_id) { - return metadata.iterate([&](const LockedKey & locked_key) + metadata.iterate([&](const LockedKey & locked_key) { for (const auto & file_segment_metadata : locked_key) func(FileSegment::getInfo(file_segment_metadata.second->file_segment)); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 20a3af60c8c..61a356fa3c3 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -340,7 +340,7 @@ void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_) remote_file_reader = remote_file_reader_; } -void FileSegment::write(const char * from, size_t size, size_t offset) +void FileSegment::write(char * from, size_t size, size_t offset) { ProfileEventTimeIncrement watch(ProfileEvents::FileSegmentWriteMicroseconds); @@ -389,16 +389,20 @@ void FileSegment::write(const char * from, size_t size, size_t offset) try { - if (!cache_writer) - cache_writer = std::make_unique(file_segment_path); - #ifdef ABORT_ON_LOGICAL_ERROR /// This mutex is only needed to have a valid assertion in assertCacheCorrectness(), /// which is only executed in debug/sanitizer builds (under ABORT_ON_LOGICAL_ERROR). std::lock_guard lock(write_mutex); #endif - cache_writer->write(from, size); + if (!cache_writer) + cache_writer = std::make_unique(file_segment_path, /* buf_size */0); + + /// Size is equal to offset as offset for write buffer points to data end. + cache_writer->set(from, size, /* offset */size); + /// Reset the buffer when finished. + SCOPE_EXIT({ cache_writer->set(nullptr, 0); }); + /// Flush the buffer. cache_writer->next(); downloaded_size += size; @@ -795,7 +799,6 @@ String FileSegment::stateToString(FileSegment::State state) case FileSegment::State::DETACHED: return "DETACHED"; } - UNREACHABLE(); } bool FileSegment::assertCorrectness() const diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 7793c50d2d5..f28482a1ce4 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -204,7 +204,7 @@ public: bool reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat = nullptr); /// Write data into reserved space. - void write(const char * from, size_t size, size_t offset); + void write(char * from, size_t size, size_t offset); // Invariant: if state() != DOWNLOADING and remote file reader is present, the reader's // available() == 0, and getFileOffsetOfBufferEnd() == our getCurrentWriteOffset(). diff --git a/src/Interpreters/Cache/FileSegmentInfo.h b/src/Interpreters/Cache/FileSegmentInfo.h index 3695be55865..1bd02f0e6ab 100644 --- a/src/Interpreters/Cache/FileSegmentInfo.h +++ b/src/Interpreters/Cache/FileSegmentInfo.h @@ -4,7 +4,7 @@ namespace DB { - enum class FileSegmentState + enum class FileSegmentState : uint8_t { DOWNLOADED, /** @@ -38,7 +38,7 @@ namespace DB DETACHED, }; - enum class FileSegmentKind + enum class FileSegmentKind : uint8_t { /** * `Regular` file segment is still in cache after usage, and can be evicted @@ -53,7 +53,7 @@ namespace DB Temporary, }; - enum class FileCacheQueueEntryType + enum class FileCacheQueueEntryType : uint8_t { None, LRU, diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index bb7b220c1f9..5d8eb9dd54a 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index d31a3fb0f10..e0691cade43 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -6,6 +6,7 @@ #include #include "Interpreters/Cache/Guards.h" + namespace DB { @@ -80,7 +81,7 @@ public: }; PriorityDumpPtr dump(const CachePriorityGuard::Lock &) override; - void pop(const CachePriorityGuard::Lock & lock) { remove(queue.begin(), lock); } + void pop(const CachePriorityGuard::Lock & lock) { remove(queue.begin(), lock); } // NOLINT bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) override; @@ -108,7 +109,7 @@ private: LRUQueue::iterator remove(LRUQueue::iterator it, const CachePriorityGuard::Lock &); - enum class IterationResult + enum class IterationResult : uint8_t { BREAK, CONTINUE, diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index c832473c4cd..5ed4ccdbeca 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -846,7 +846,7 @@ LockedKey::~LockedKey() /// See comment near cleanupThreadFunc() for more details. key_metadata->key_state = KeyMetadata::KeyState::REMOVING; - LOG_DEBUG(key_metadata->logger(), "Submitting key {} for removal", getKey()); + LOG_TRACE(key_metadata->logger(), "Submitting key {} for removal", getKey()); key_metadata->addToCleanupQueue(); } diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 31651149998..a5c8f3c0cf4 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -99,7 +99,7 @@ struct KeyMetadata : private std::map, const CacheMetadata * cache_metadata_, bool created_base_directory_ = false); - enum class KeyState + enum class KeyState : uint8_t { ACTIVE, REMOVING, @@ -178,7 +178,7 @@ public: void iterate(IterateFunc && func, const UserID & user_id); - enum class KeyNotFoundPolicy + enum class KeyNotFoundPolicy : uint8_t { THROW, THROW_LOGICAL, diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index fafe50c170f..a3fe8c2e779 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -126,6 +126,11 @@ bool astContainsSystemTables(ASTPtr ast, ContextPtr context) namespace { +bool isQueryCacheRelatedSetting(const String & setting_name) +{ + return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache"); +} + class RemoveQueryCacheSettingsMatcher { public: @@ -141,7 +146,7 @@ public: auto is_query_cache_related_setting = [](const auto & change) { - return change.name.starts_with("query_cache_") || change.name.ends_with("_query_cache"); + return isQueryCacheRelatedSetting(change.name); }; std::erase_if(set_clause->changes, is_query_cache_related_setting); @@ -177,6 +182,40 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast) return transformed_ast; } +IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings) +{ + ast = removeQueryCacheSettings(ast); + + /// Hash the AST, we must consider aliases (issue #56258) + SipHash hash; + ast->updateTreeHash(hash, /*ignore_aliases=*/ false); + + /// Also hash the database specified via SQL `USE db`, otherwise identifiers in same query (AST) may mean different columns in different + /// tables (issue #64136) + hash.update(current_database); + + /// Finally, hash the (changed) settings as they might affect the query result (e.g. think of settings `additional_table_filters` and `limit`). + /// Note: allChanged() returns the settings in random order. Also, update()-s of the composite hash must be done in deterministic order. + /// Therefore, collect and sort the settings first, then hash them. + Settings::Range changed_settings = settings.allChanged(); + std::vector> changed_settings_sorted; /// (name, value) + for (const auto & setting : changed_settings) + { + const String & name = setting.getName(); + const String & value = setting.getValueString(); + if (!isQueryCacheRelatedSetting(name)) /// see removeQueryCacheSettings() why this is a good idea + changed_settings_sorted.push_back({name, value}); + } + std::sort(changed_settings_sorted.begin(), changed_settings_sorted.end(), [](auto & lhs, auto & rhs) { return lhs.first < rhs.first; }); + for (const auto & setting : changed_settings_sorted) + { + hash.update(setting.first); + hash.update(setting.second); + } + + return getSipHash128AsPair(hash); +} + String queryStringFromAST(ASTPtr ast) { WriteBufferFromOwnString buf; @@ -186,17 +225,16 @@ String queryStringFromAST(ASTPtr ast) } -/// Hashing of ASTs must consider aliases (issue #56258) -static constexpr bool ignore_aliases = false; - QueryCache::Key::Key( ASTPtr ast_, + const String & current_database, + const Settings & settings, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, std::chrono::time_point expires_at_, bool is_compressed_) - : ast_hash(removeQueryCacheSettings(ast_)->getTreeHash(ignore_aliases)) + : ast_hash(calculateAstHash(ast_, current_database, settings)) , header(header_) , user_id(user_id_) , current_user_roles(current_user_roles_) @@ -207,8 +245,8 @@ QueryCache::Key::Key( { } -QueryCache::Key::Key(ASTPtr ast_, std::optional user_id_, const std::vector & current_user_roles_) - : QueryCache::Key(ast_, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name +QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_) + : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles { } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c1c6c394698..461197cac32 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -14,6 +14,8 @@ namespace DB { +struct Settings; + /// Does AST contain non-deterministic functions like rand() and now()? bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); @@ -30,7 +32,7 @@ bool astContainsSystemTables(ASTPtr ast, ContextPtr context); class QueryCache { public: - enum class Usage + enum class Usage : uint8_t { Unknown, /// we don't know what what happened None, /// query result neither written nor read into/from query cache @@ -88,6 +90,8 @@ public: /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, + const String & current_database, + const Settings & settings, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, @@ -95,7 +99,7 @@ public: bool is_compressed); /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name). - Key(ASTPtr ast_, std::optional user_id_, const std::vector & current_user_roles_); + Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_); bool operator==(const Key & other) const; }; @@ -144,7 +148,7 @@ public: Writer(const Writer & other); - enum class ChunkType {Result, Totals, Extremes}; + enum class ChunkType : uint8_t {Result, Totals, Extremes}; void buffer(Chunk && chunk, ChunkType chunk_type); void finalizeWrite(); diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 2ac38aeeca7..a593ebfdab2 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -110,14 +110,11 @@ void WriteBufferToFileSegment::nextImpl() std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() { + /** Finalize here and we don't need to finalize in the destructor, + * because in case destructor called without `getReadBufferImpl` called, data won't be read. + */ finalize(); return std::make_unique(file_segment->getPath()); } -WriteBufferToFileSegment::~WriteBufferToFileSegment() -{ - /// To be sure that file exists before destructor of segment_holder is called - WriteBufferFromFileDecorator::finalize(); -} - } diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h index 822488ceb48..c4b0491f8c0 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.h +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -16,7 +16,6 @@ public: explicit WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder); void nextImpl() override; - ~WriteBufferToFileSegment() override; private: diff --git a/src/Interpreters/CancellationCode.h b/src/Interpreters/CancellationCode.h index 69f77e012f3..e37a7f13105 100644 --- a/src/Interpreters/CancellationCode.h +++ b/src/Interpreters/CancellationCode.h @@ -4,13 +4,13 @@ namespace DB { /// A result code for the KILL QUERY/KILL MUTATION statement. -enum class CancellationCode +enum class CancellationCode : uint8_t { NotFound = 0, /// already cancelled QueryIsNotInitializedYet = 1, CancelCannotBeSent = 2, CancelSent = 3, - Unknown + Unknown = 255 }; } diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index c2ed9f7ffa4..3054667e264 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -130,6 +130,16 @@ public: UInt64 count_participating_replicas{0}; UInt64 number_of_current_replica{0}; + enum class BackgroundOperationType : uint8_t + { + NOT_A_BACKGROUND_OPERATION = 0, + MERGE = 1, + MUTATION = 2, + }; + + /// It's ClientInfo and context created for background operation (not real query) + BackgroundOperationType background_operation_type{BackgroundOperationType::NOT_A_BACKGROUND_OPERATION}; + bool empty() const { return query_kind == QueryKind::NO_QUERY; } /** Serialization and deserialization. diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index b15f0e4b94e..59c98491c14 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -392,10 +392,10 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, config_prefix += "."; secret = config.getString(config_prefix + "secret", ""); - boost::range::remove_erase(config_keys, "secret"); + std::erase(config_keys, "secret"); allow_distributed_ddl_queries = config.getBool(config_prefix + "allow_distributed_ddl_queries", true); - boost::range::remove_erase(config_keys, "allow_distributed_ddl_queries"); + std::erase(config_keys, "allow_distributed_ddl_queries"); if (config_keys.empty()) throw Exception(ErrorCodes::SHARD_HAS_NO_CONNECTIONS, "No cluster elements (shard, node) specified in config at path {}", config_prefix); diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index d432488964d..6f9c375c2f5 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index f7727f70ff7..13e6fa87051 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -38,7 +38,8 @@ namespace ErrorCodes namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, +ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, + bool is_remote_function, ContextPtr context, const Settings & settings, const StorageID & main_table, @@ -46,9 +47,17 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, LoggerPtr log, const DistributedSettings * distributed_settings) { + ClientInfo new_client_info = context->getClientInfo(); Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); + /// In case of interserver mode we should reset initial_user for remote() function to use passed user from the query. + if (is_remote_function) + { + const auto & address = cluster.getShardsAddresses().front().front(); + new_client_info.initial_user = address.user; + } + /// If "secret" (in remote_servers) is not in use, /// user on the shard is not the same as the user on the initiator, /// hence per-user limits should not be applied. @@ -168,9 +177,23 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); + new_context->setClientInfo(new_client_info); return new_context; } +ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table) +{ + return updateSettingsAndClientInfoForCluster(cluster, + /* is_remote_function= */ false, + context, + settings, + main_table, + /* additional_filter_ast= */ {}, + /* log= */ {}, + /* distributed_settings= */ {}); +} + + static ThrottlerPtr getThrottler(const ContextPtr & context) { const Settings & settings = context->getSettingsRef(); @@ -209,7 +232,8 @@ void executeQuery( const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator) + AdditionalShardFilterGenerator shard_filter_generator, + bool is_remote_function) { const Settings & settings = context->getSettingsRef(); @@ -222,8 +246,8 @@ void executeQuery( SelectStreamFactory::Shards remote_shards; auto cluster = query_info.getCluster(); - auto new_context = updateSettingsForCluster(*cluster, context, settings, main_table, query_info.additional_filter_ast, log, - &distributed_settings); + auto new_context = updateSettingsAndClientInfoForCluster(*cluster, is_remote_function, context, + settings, main_table, query_info.additional_filter_ast, log, &distributed_settings); if (context->getSettingsRef().allow_experimental_parallel_reading_from_replicas && context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value != new_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value) @@ -372,11 +396,17 @@ void executeQuery( void executeQueryWithParallelReplicas( QueryPlan & query_plan, - SelectStreamFactory & stream_factory, + const StorageID & storage_id, + const Block & header, + QueryProcessingStage::Enum processed_stage, const ASTPtr & query_ast, ContextPtr context, std::shared_ptr storage_limits) { + auto logger = getLogger("executeQueryWithParallelReplicas"); + LOG_DEBUG(logger, "Executing read from {}, header {}, query ({}), stage {} with parallel replicas", + storage_id.getNameForLogs(), header.dumpStructure(), query_ast->formatForLogging(), processed_stage); + const auto & settings = context->getSettingsRef(); /// check cluster for parallel replicas @@ -461,9 +491,10 @@ void executeQueryWithParallelReplicas( auto read_from_remote = std::make_unique( query_ast, new_cluster, + storage_id, std::move(coordinator), - stream_factory.header, - stream_factory.processed_stage, + header, + processed_stage, new_context, getThrottler(new_context), std::move(scalars), diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 8f6f6300c7b..284fea05135 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -38,13 +38,7 @@ class SelectStreamFactory; /// - optimize_skip_unused_shards_nesting /// /// @return new Context with adjusted settings -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, - ContextPtr context, - const Settings & settings, - const StorageID & main_table, - ASTPtr additional_filter_ast = nullptr, - LoggerPtr log = nullptr, - const DistributedSettings * distributed_settings = nullptr); +ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table); using AdditionalShardFilterGenerator = std::function; /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. @@ -63,12 +57,15 @@ void executeQuery( const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator); + AdditionalShardFilterGenerator shard_filter_generator, + bool is_remote_function); void executeQueryWithParallelReplicas( QueryPlan & query_plan, - SelectStreamFactory & stream_factory, + const StorageID & storage_id, + const Block & header, + QueryProcessingStage::Enum processed_stage, const ASTPtr & query_ast, ContextPtr context, std::shared_ptr storage_limits); diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index ff10db3beb5..5dda33a8b98 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -17,7 +17,7 @@ class TableJoin; namespace ASOF { - enum class Inequality; + enum class Inequality : uint8_t; } namespace ErrorCodes @@ -25,7 +25,7 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; } -enum class JoinIdentifierPos +enum class JoinIdentifierPos : uint8_t { /// Position can't be established, identifier not resolved Unknown, diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp index 4eacbae7a30..d53ff4b0227 100644 --- a/src/Interpreters/ComparisonGraph.cpp +++ b/src/Interpreters/ComparisonGraph.cpp @@ -309,7 +309,6 @@ ComparisonGraphCompareResult ComparisonGraph::pathToCompareResult(Path pat case Path::GREATER: return inverse ? ComparisonGraphCompareResult::LESS : ComparisonGraphCompareResult::GREATER; case Path::GREATER_OR_EQUAL: return inverse ? ComparisonGraphCompareResult::LESS_OR_EQUAL : ComparisonGraphCompareResult::GREATER_OR_EQUAL; } - UNREACHABLE(); } template diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 200d4af38fc..e1c436a7e43 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -154,7 +154,7 @@ private: const Graph & reversed_graph, size_t v, OptionalIndices & components, size_t component); - enum class Path + enum class Path : uint8_t { GREATER, GREATER_OR_EQUAL, diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 96be70c5527..53987694e46 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -1,10 +1,9 @@ -#include -#include #include #include #include #include #include +#include #include #include #include @@ -15,10 +14,20 @@ #include #include #include +#include #include +#include #include +#include +#include #include -#include + +namespace CurrentMetrics +{ +extern const Metric ConcurrentHashJoinPoolThreads; +extern const Metric ConcurrentHashJoinPoolThreadsActive; +extern const Metric ConcurrentHashJoinPoolThreadsScheduled; +} namespace DB { @@ -36,20 +45,82 @@ static UInt32 toPowerOfTwo(UInt32 x) return static_cast(1) << (32 - std::countl_zero(x - 1)); } -ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) +ConcurrentHashJoin::ConcurrentHashJoin( + ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) : context(context_) , table_join(table_join_) , slots(toPowerOfTwo(std::min(static_cast(slots_), 256))) + , pool(std::make_unique( + CurrentMetrics::ConcurrentHashJoinPoolThreads, + CurrentMetrics::ConcurrentHashJoinPoolThreadsActive, + CurrentMetrics::ConcurrentHashJoinPoolThreadsScheduled, + slots)) { - for (size_t i = 0; i < slots; ++i) - { - auto inner_hash_join = std::make_shared(); + hash_joins.resize(slots); - inner_hash_join->data = std::make_unique(table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", i)); - /// Non zero `max_joined_block_rows` allows to process block partially and return not processed part. - /// TODO: It's not handled properly in ConcurrentHashJoin case, so we set it to 0 to disable this feature. - inner_hash_join->data->setMaxJoinedBlockRows(0); - hash_joins.emplace_back(std::move(inner_hash_join)); + try + { + for (size_t i = 0; i < slots; ++i) + { + pool->scheduleOrThrow( + [&, idx = i, thread_group = CurrentThread::getGroup()]() + { + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + }); + + if (thread_group) + CurrentThread::attachToGroupIfDetached(thread_group); + setThreadName("ConcurrentJoin"); + + auto inner_hash_join = std::make_shared(); + inner_hash_join->data = std::make_unique( + table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", idx)); + /// Non zero `max_joined_block_rows` allows to process block partially and return not processed part. + /// TODO: It's not handled properly in ConcurrentHashJoin case, so we set it to 0 to disable this feature. + inner_hash_join->data->setMaxJoinedBlockRows(0); + hash_joins[idx] = std::move(inner_hash_join); + }); + } + pool->wait(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + pool->wait(); + throw; + } +} + +ConcurrentHashJoin::~ConcurrentHashJoin() +{ + try + { + for (size_t i = 0; i < slots; ++i) + { + // Hash tables destruction may be very time-consuming. + // Without the following code, they would be destroyed in the current thread (i.e. sequentially). + // `InternalHashJoin` is moved here and will be destroyed in the destructor of the lambda function. + pool->scheduleOrThrow( + [join = std::move(hash_joins[i]), thread_group = CurrentThread::getGroup()]() + { + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + }); + + if (thread_group) + CurrentThread::attachToGroupIfDetached(thread_group); + setThreadName("ConcurrentJoin"); + }); + } + pool->wait(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + pool->wait(); } } diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 40796376d23..c797ff27ece 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB { @@ -39,7 +40,7 @@ public: const Block & right_sample_block, bool any_take_last_row_ = false); - ~ConcurrentHashJoin() override = default; + ~ConcurrentHashJoin() override; std::string getName() const override { return "ConcurrentHashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } @@ -66,6 +67,7 @@ private: ContextPtr context; std::shared_ptr table_join; size_t slots; + std::unique_ptr pool; std::vector> hash_joins; std::mutex totals_mutex; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 726b480930a..5c9ae4716b9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -159,6 +160,8 @@ namespace CurrentMetrics extern const Metric TablesLoaderForegroundThreadsScheduled; extern const Metric IOWriterThreadsScheduled; extern const Metric AttachedTable; + extern const Metric AttachedView; + extern const Metric AttachedDictionary; extern const Metric AttachedDatabase; extern const Metric PartsActive; } @@ -358,6 +361,8 @@ struct ContextSharedPart : boost::noncopyable /// No lock required for format_schema_path modified only during initialization std::atomic_size_t max_database_num_to_warn = 1000lu; std::atomic_size_t max_table_num_to_warn = 5000lu; + std::atomic_size_t max_view_num_to_warn = 10000lu; + std::atomic_size_t max_dictionary_num_to_warn = 1000lu; std::atomic_size_t max_part_num_to_warn = 100000lu; String format_schema_path; /// Path to a directory that contains schema files used by input formats. String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types. @@ -934,6 +939,10 @@ Strings Context::getWarnings() const common_warnings = shared->warnings; if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast(shared->max_table_num_to_warn)) common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn)); + if (CurrentMetrics::get(CurrentMetrics::AttachedView) > static_cast(shared->max_view_num_to_warn)) + common_warnings.emplace_back(fmt::format("The number of attached views is more than {}", shared->max_view_num_to_warn)); + if (CurrentMetrics::get(CurrentMetrics::AttachedDictionary) > static_cast(shared->max_dictionary_num_to_warn)) + common_warnings.emplace_back(fmt::format("The number of attached dictionaries is more than {}", shared->max_dictionary_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast(shared->max_database_num_to_warn)) common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_database_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast(shared->max_part_num_to_warn)) @@ -1044,29 +1053,30 @@ try { LOG_DEBUG(log, "Setting up {} to store temporary data in it", path); - fs::create_directories(path); - - /// Clearing old temporary files. - fs::directory_iterator dir_end; - for (fs::directory_iterator it(path); it != dir_end; ++it) + if (fs::exists(path)) { - if (it->is_regular_file()) + /// Clearing old temporary files. + fs::directory_iterator dir_end; + for (fs::directory_iterator it(path); it != dir_end; ++it) { - if (startsWith(it->path().filename(), "tmp")) + if (it->is_regular_file()) { - LOG_DEBUG(log, "Removing old temporary file {}", it->path().string()); - fs::remove(it->path()); + if (startsWith(it->path().filename(), "tmp")) + { + LOG_DEBUG(log, "Removing old temporary file {}", it->path().string()); + fs::remove(it->path()); + } + else + LOG_DEBUG(log, "Found unknown file in temporary path {}", it->path().string()); } - else - LOG_DEBUG(log, "Found unknown file in temporary path {}", it->path().string()); + /// We skip directories (for example, 'http_buffers' - it's used for buffering of the results) and all other file types. } - /// We skip directories (for example, 'http_buffers' - it's used for buffering of the results) and all other file types. } } catch (...) { DB::tryLogCurrentException(log, fmt::format( - "Caught exception while setup temporary path: {}. " + "Caught exception while setting up temporary path: {}. " "It is ok to skip this exception as cleaning old temporary files is not necessary", path)); } @@ -1091,9 +1101,7 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size) VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, shared->getConfigRefWithLock(lock)); for (const auto & disk : volume->getDisks()) - { setupTmpPath(shared->log, disk->getPath()); - } TemporaryDataOnDiskSettings temporary_data_on_disk_settings; temporary_data_on_disk_settings.max_size_on_disk = max_size; @@ -1395,18 +1403,18 @@ void Context::checkAccessImpl(const Args &... args) const return getAccess()->checkAccess(args...); } -void Context::checkAccess(const AccessFlags & flags) const { return checkAccessImpl(flags); } -void Context::checkAccess(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); } -void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); } -void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); } -void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(flags, database, table, columns); } -void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); } +void Context::checkAccess(const AccessFlags & flags) const { checkAccessImpl(flags); } +void Context::checkAccess(const AccessFlags & flags, std::string_view database) const { checkAccessImpl(flags, database); } +void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(flags, database, table); } +void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(flags, database, table, column); } +void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } +void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName()); } void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, std::string_view column) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), column); } void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const std::vector & columns) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns); } void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const Strings & columns) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns); } -void Context::checkAccess(const AccessRightsElement & element) const { return checkAccessImpl(element); } -void Context::checkAccess(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } +void Context::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } +void Context::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } std::shared_ptr Context::getAccess() const { @@ -1608,6 +1616,21 @@ Tables Context::getExternalTables() const void Context::addExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +{ + addExternalTable(table_name, std::make_shared(std::move(temporary_table))); +} + +void Context::updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +{ + updateExternalTable(table_name, std::make_shared(std::move(temporary_table))); +} + +void Context::addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +{ + addOrUpdateExternalTable(table_name, std::make_shared(std::move(temporary_table))); +} + +void Context::addExternalTable(const String & table_name, std::shared_ptr temporary_table) { if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); @@ -1615,34 +1638,32 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder & std::lock_guard lock(mutex); if (external_tables_mapping.end() != external_tables_mapping.find(table_name)) throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists", backQuoteIfNeed(table_name)); - external_tables_mapping.emplace(table_name, std::make_shared(std::move(temporary_table))); + + external_tables_mapping.emplace(table_name, std::move(temporary_table)); } -void Context::updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +void Context::updateExternalTable(const String & table_name, std::shared_ptr temporary_table) { if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); - auto temporary_table_ptr = std::make_shared(std::move(temporary_table)); - std::lock_guard lock(mutex); auto it = external_tables_mapping.find(table_name); if (it == external_tables_mapping.end()) - throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} does not exists", backQuoteIfNeed(table_name)); - it->second = std::move(temporary_table_ptr); + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} does not exist", backQuoteIfNeed(table_name)); + + it->second = std::move(temporary_table); } -void Context::addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +void Context::addOrUpdateExternalTable(const String & table_name, std::shared_ptr temporary_table) { if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); - auto temporary_table_ptr = std::make_shared(std::move(temporary_table)); - std::lock_guard lock(mutex); - auto [it, inserted] = external_tables_mapping.emplace(table_name, temporary_table_ptr); + auto [it, inserted] = external_tables_mapping.emplace(table_name, temporary_table); if (!inserted) - it->second = std::move(temporary_table_ptr); + it->second = std::move(temporary_table); } std::shared_ptr Context::findExternalTable(const String & table_name) const @@ -2365,6 +2386,17 @@ void Context::setCurrentQueryId(const String & query_id) client_info.initial_query_id = client_info.current_query_id; } +void Context::setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType background_operation) +{ + chassert(background_operation != ClientInfo::BackgroundOperationType::NOT_A_BACKGROUND_OPERATION); + client_info.background_operation_type = background_operation; +} + +bool Context::isBackgroundOperationContext() const +{ + return client_info.background_operation_type != ClientInfo::BackgroundOperationType::NOT_A_BACKGROUND_OPERATION; +} + void Context::killCurrentQuery() const { if (auto elem = getProcessListElement()) @@ -2499,7 +2531,7 @@ AsyncLoader & Context::getAsyncLoader() const shared->async_loader = std::make_unique(std::vector{ // IMPORTANT: Pool declaration order should match the order in `PoolId.h` to get the indices right. { // TablesLoaderForegroundPoolId - "FgLoad", + "ForegroundLoad", CurrentMetrics::TablesLoaderForegroundThreads, CurrentMetrics::TablesLoaderForegroundThreadsActive, CurrentMetrics::TablesLoaderForegroundThreadsScheduled, @@ -2507,7 +2539,7 @@ AsyncLoader & Context::getAsyncLoader() const TablesLoaderForegroundPriority }, { // TablesLoaderBackgroundLoadPoolId - "BgLoad", + "BackgroundLoad", CurrentMetrics::TablesLoaderBackgroundThreads, CurrentMetrics::TablesLoaderBackgroundThreadsActive, CurrentMetrics::TablesLoaderBackgroundThreadsScheduled, @@ -2515,7 +2547,7 @@ AsyncLoader & Context::getAsyncLoader() const TablesLoaderBackgroundLoadPriority }, { // TablesLoaderBackgroundStartupPoolId - "BgStartup", + "BackgrndStartup", CurrentMetrics::TablesLoaderBackgroundThreads, CurrentMetrics::TablesLoaderBackgroundThreadsActive, CurrentMetrics::TablesLoaderBackgroundThreadsScheduled, @@ -3698,6 +3730,18 @@ void Context::setMaxTableNumToWarn(size_t max_table_to_warn) shared->max_table_num_to_warn= max_table_to_warn; } +void Context::setMaxViewNumToWarn(size_t max_view_to_warn) +{ + SharedLockGuard lock(shared->mutex); + shared->max_view_num_to_warn= max_view_to_warn; +} + +void Context::setMaxDictionaryNumToWarn(size_t max_dictionary_to_warn) +{ + SharedLockGuard lock(shared->mutex); + shared->max_dictionary_num_to_warn= max_dictionary_to_warn; +} + void Context::setMaxDatabaseNumToWarn(size_t max_database_to_warn) { SharedLockGuard lock(shared->mutex); @@ -4468,7 +4512,7 @@ void Context::setApplicationType(ApplicationType type) /// Lock isn't required, you should set it at start shared->application_type = type; - if (type == ApplicationType::LOCAL || type == ApplicationType::SERVER) + if (type == ApplicationType::LOCAL || type == ApplicationType::SERVER || type == ApplicationType::DISKS) shared->server_settings.loadSettingsFromConfig(Poco::Util::Application::instance().config()); if (type == ApplicationType::SERVER) @@ -5176,10 +5220,10 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co } #if USE_LIBURING -IOUringReader & Context::getIOURingReader() const +IOUringReader & Context::getIOUringReader() const { callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = std::make_unique(512); + shared->io_uring_reader = createIOUringReader(); }); return *shared->io_uring_reader; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d1ff5b4c2b2..87a7baa0469 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -61,7 +61,7 @@ struct QuotaUsage; class AccessFlags; struct AccessRightsElement; class AccessRightsElements; -enum class RowPolicyFilterType; +enum class RowPolicyFilterType : uint8_t; class EmbeddedDictionaries; class ExternalDictionariesLoader; class ExternalUserDefinedExecutableFunctionsLoader; @@ -685,6 +685,9 @@ public: void addExternalTable(const String & table_name, TemporaryTableHolder && temporary_table); void updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table); void addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table); + void addExternalTable(const String & table_name, std::shared_ptr temporary_table); + void updateExternalTable(const String & table_name, std::shared_ptr temporary_table); + void addOrUpdateExternalTable(const String & table_name, std::shared_ptr temporary_table); std::shared_ptr findExternalTable(const String & table_name) const; std::shared_ptr removeExternalTable(const String & table_name); @@ -718,7 +721,7 @@ public: void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name); /// Supported factories for records in query_log - enum class QueryLogFactories + enum class QueryLogFactories : uint8_t { AggregateFunction, AggregateFunctionCombinator, @@ -757,6 +760,12 @@ public: void setCurrentDatabaseNameInGlobalContext(const String & name); void setCurrentQueryId(const String & query_id); + /// FIXME: for background operations (like Merge and Mutation) we also use the same Context object and even setup + /// query_id for it (table_uuid::result_part_name). We can distinguish queries from background operation in some way like + /// bool is_background = query_id.contains("::"), but it's much worse than just enum check with more clear purpose + void setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType setBackgroundOperationTypeForContextbackground_operation); + bool isBackgroundOperationContext() const; + void killCurrentQuery() const; bool isCurrentQueryKilled() const; @@ -858,6 +867,8 @@ public: const HTTPHeaderFilter & getHTTPHeaderFilter() const; void setMaxTableNumToWarn(size_t max_table_to_warn); + void setMaxViewNumToWarn(size_t max_view_to_warn); + void setMaxDictionaryNumToWarn(size_t max_dictionary_to_warn); void setMaxDatabaseNumToWarn(size_t max_database_to_warn); void setMaxPartNumToWarn(size_t max_part_to_warn); /// The port that the server listens for executing SQL queries. @@ -1155,7 +1166,7 @@ public: ActionLocksManagerPtr getActionLocksManager() const; - enum class ApplicationType + enum class ApplicationType : uint8_t { SERVER, /// The program is run as clickhouse-server daemon (default behavior) CLIENT, /// clickhouse-client @@ -1243,7 +1254,7 @@ public: IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; #if USE_LIBURING - IOUringReader & getIOURingReader() const; + IOUringReader & getIOUringReader() const; #endif std::shared_ptr getAsyncReadCounters() const; @@ -1387,11 +1398,6 @@ struct HTTPContext : public IHTTPContext return context->getSettingsRef().http_max_field_value_size; } - uint64_t getMaxChunkSize() const override - { - return context->getSettingsRef().http_max_chunk_size; - } - Poco::Timespan getReceiveTimeout() const override { return context->getSettingsRef().http_receive_timeout; diff --git a/src/Interpreters/Context_fwd.h b/src/Interpreters/Context_fwd.h index c7928bbdbf3..d69ea6bc135 100644 --- a/src/Interpreters/Context_fwd.h +++ b/src/Interpreters/Context_fwd.h @@ -36,7 +36,7 @@ struct WithContextImpl WithContextImpl() = default; explicit WithContextImpl(Weak context_) : context(context_) {} - inline Shared getContext() const + Shared getContext() const { auto ptr = context.lock(); if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Context has expired"); diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index 5a8a5bfb184..0b0460b26c8 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -133,10 +133,10 @@ struct DDLTaskBase virtual void createSyncedNodeIfNeed(const ZooKeeperPtr & /*zookeeper*/) {} - inline String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; } - inline String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; } - inline String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); } - inline String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; } + String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; } + String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; } + String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); } + String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; } static String getLogEntryName(UInt32 log_entry_number); static UInt32 getLogEntryNumber(const String & log_entry_name); diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index fd807d54eff..5639eed552e 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -569,7 +569,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__, task.entry.tracing_context, this->context->getOpenTelemetrySpanLog()); - tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER; + tracing_ctx_holder.root_span.kind = OpenTelemetry::SpanKind::CONSUMER; String active_node_path = task.getActiveNodePath(); String finished_node_path = task.getFinishedNodePath(); @@ -676,7 +676,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) if (task.execution_status.code != 0) { bool status_written_by_table_or_db = task.ops.empty(); - if (status_written_by_table_or_db) + bool is_replicated_database_task = dynamic_cast(&task); + if (status_written_by_table_or_db || is_replicated_database_task) { throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.message); } @@ -710,6 +711,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) task.createSyncedNodeIfNeed(zookeeper); updateMaxDDLEntryID(task.entry_name); task.completely_processed = true; + subsequent_errors_count = 0; } @@ -791,6 +793,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( bool executed_by_us = false; bool executed_by_other_leader = false; + bool extra_attempt_for_replicated_database = false; + /// Defensive programming. One hour is more than enough to execute almost all DDL queries. /// If it will be very long query like ALTER DELETE for a huge table it's still will be executed, /// but DDL worker can continue processing other queries. @@ -835,7 +839,14 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( /// Checking and incrementing counter exclusively. size_t counter = parse(zookeeper->get(tries_to_execute_path)); if (counter > MAX_TRIES_TO_EXECUTE) - break; + { + /// Replicated databases have their own retries, limiting retries here would break outer retries + bool is_replicated_database_task = dynamic_cast(&task); + if (is_replicated_database_task) + extra_attempt_for_replicated_database = true; + else + break; + } zookeeper->set(tries_to_execute_path, toString(counter + 1)); @@ -849,6 +860,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( executed_by_us = true; break; } + else if (extra_attempt_for_replicated_database) + break; } /// Waiting for someone who will execute query and change is_executed_path node @@ -892,7 +905,9 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( else /// If we exceeded amount of tries { LOG_WARNING(log, "Task {} was not executed by anyone, maximum number of retries exceeded", task.entry_name); - task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, maximum retries exceeded"); + bool keep_original_error = extra_attempt_for_replicated_database && task.execution_status.code; + if (!keep_original_error) + task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, maximum retries exceeded"); } return false; } @@ -922,7 +937,6 @@ void DDLWorker::cleanupQueue(Int64, const ZooKeeperPtr & zookeeper) String node_path = fs::path(queue_dir) / node_name; Coordination::Stat stat; - String dummy; try { @@ -1145,12 +1159,14 @@ void DDLWorker::runMainThread() cleanup_event->set(); scheduleTasks(reinitialized); + subsequent_errors_count = 0; LOG_DEBUG(log, "Waiting for queue updates"); queue_updated_event->wait(); } catch (const Coordination::Exception & e) { + subsequent_errors_count = 0; if (Coordination::isHardwareError(e.code)) { initialized = false; @@ -1168,9 +1184,32 @@ void DDLWorker::runMainThread() } catch (...) { - tryLogCurrentException(log, "Unexpected error, will try to restart main thread"); - reset_state(); + String message = getCurrentExceptionMessage(/*with_stacktrace*/ true); + if (subsequent_errors_count) + { + if (last_unexpected_error == message) + { + ++subsequent_errors_count; + } + else + { + subsequent_errors_count = 1; + last_unexpected_error = message; + } + } + else + { + subsequent_errors_count = 1; + last_unexpected_error = message; + } + + LOG_ERROR(log, "Unexpected error ({} times in a row), will try to restart main thread: {}", subsequent_errors_count, message); + + /// Sleep before retrying sleepForSeconds(5); + /// Reset state after sleeping, so DatabaseReplicated::canExecuteReplicatedMetadataAlter() + /// will have a chance even when the database got stuck in infinite retries + reset_state(); } } } diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index 9eb6606e73c..6d1dabda54f 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -194,6 +194,9 @@ protected: ConcurrentSet entries_to_skip; + std::atomic_uint64_t subsequent_errors_count = 0; + String last_unexpected_error; + const CurrentMetrics::Metric * max_entry_metric; const CurrentMetrics::Metric * max_pushed_entry_metric; }; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0caca88c283..0f4c8cc26a6 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -988,7 +988,7 @@ void DatabaseCatalog::loadMarkedAsDroppedTables() /// we should load them and enqueue cleanup to remove data from store/ and metadata from ZooKeeper std::map dropped_metadata; - String path = getContext()->getPath() + "metadata_dropped/"; + String path = std::filesystem::path(getContext()->getPath()) / "metadata_dropped" / ""; if (!std::filesystem::exists(path)) { @@ -1043,10 +1043,11 @@ void DatabaseCatalog::loadMarkedAsDroppedTables() String DatabaseCatalog::getPathForDroppedMetadata(const StorageID & table_id) const { - return getContext()->getPath() + "metadata_dropped/" + - escapeForFileName(table_id.getDatabaseName()) + "." + - escapeForFileName(table_id.getTableName()) + "." + - toString(table_id.uuid) + ".sql"; + return std::filesystem::path(getContext()->getPath()) / "metadata_dropped" / + fmt::format("{}.{}.{}.sql", + escapeForFileName(table_id.getDatabaseName()), + escapeForFileName(table_id.getTableName()), + toString(table_id.uuid)); } String DatabaseCatalog::getPathForMetadata(const StorageID & table_id) const @@ -1437,7 +1438,7 @@ void DatabaseCatalog::checkTableCanBeRemovedOrRenamed( if (!check_referential_dependencies && !check_loading_dependencies) return; std::lock_guard lock{databases_mutex}; - return checkTableCanBeRemovedOrRenamedUnlocked(table_id, check_referential_dependencies, check_loading_dependencies, is_drop_database); + checkTableCanBeRemovedOrRenamedUnlocked(table_id, check_referential_dependencies, check_loading_dependencies, is_drop_database); } void DatabaseCatalog::checkTableCanBeRemovedOrRenamedUnlocked( diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 5caa034e0e9..37125d9900c 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -284,7 +284,7 @@ private: static constexpr UInt64 bits_for_first_level = 4; using UUIDToStorageMap = std::array; - static inline size_t getFirstLevelIdx(const UUID & uuid) + static size_t getFirstLevelIdx(const UUID & uuid) { return UUIDHelpers::getHighBytes(uuid) >> (64 - bits_for_first_level); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index fefbd67bfc1..d80d5cd5b93 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include #include @@ -336,7 +336,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. if (!aggregate_descriptions.empty() || group_size > 1) { - if (j + 1 < static_cast(group_size)) + if (j + 1 < group_size) group_elements_ast[j] = std::move(group_elements_ast.back()); group_elements_ast.pop_back(); @@ -390,7 +390,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. if (!aggregate_descriptions.empty() || size > 1) { - if (i + 1 < static_cast(size)) + if (i + 1 < size) group_asts[i] = std::move(group_asts.back()); group_asts.pop_back(); diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index f48ee61dab8..1685c06d387 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -77,21 +77,23 @@ void ExternalDictionariesLoader::updateObjectFromConfigWithoutReloading(IExterna ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::getDictionary(const std::string & dictionary_name, ContextPtr local_context) const { std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); + auto dictionary = std::static_pointer_cast(load(resolved_dictionary_name)); if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, dictionary->getQualifiedName()); - return std::static_pointer_cast(load(resolved_dictionary_name)); + return dictionary; } ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::tryGetDictionary(const std::string & dictionary_name, ContextPtr local_context) const { std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); + auto dictionary = std::static_pointer_cast(tryLoad(resolved_dictionary_name)); - if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries && dictionary) + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, dictionary->getQualifiedName()); - return std::static_pointer_cast(tryLoad(resolved_dictionary_name)); + return dictionary; } diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index a636e59fa1a..96405f35f3f 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ExternalLoader.h b/src/Interpreters/ExternalLoader.h index c746ce39b2d..49b5e68d821 100644 --- a/src/Interpreters/ExternalLoader.h +++ b/src/Interpreters/ExternalLoader.h @@ -110,17 +110,17 @@ public: /// Returns the result of loading the object. /// The function doesn't load anything, it just returns the current load result as is. - template , void>> + template , void>> // NOLINT ReturnType getLoadResult(const String & name) const; using FilterByNameFunction = std::function; /// Returns all the load results as a map. /// The function doesn't load anything, it just returns the current load results as is. - template , void>> + template , void>> // NOLINT ReturnType getLoadResults() const { return getLoadResults(FilterByNameFunction{}); } - template , void>> + template , void>> // NOLINT ReturnType getLoadResults(const FilterByNameFunction & filter) const; /// Returns all loaded objects as a map. @@ -144,59 +144,59 @@ public: /// Loads a specified object. /// The function does nothing if it's already loaded. /// The function doesn't throw an exception if it's failed to load. - template , void>> + template , void>> // NOLINT ReturnType tryLoad(const String & name, Duration timeout = WAIT) const; /// Loads objects by filter. /// The function does nothing for already loaded objects, it just returns them. /// The function doesn't throw an exception if it's failed to load something. - template , void>> + template , void>> // NOLINT ReturnType tryLoad(const FilterByNameFunction & filter, Duration timeout = WAIT) const; /// Loads all objects. /// The function does nothing for already loaded objects, it just returns them. /// The function doesn't throw an exception if it's failed to load something. - template , void>> + template , void>> // NOLINT ReturnType tryLoadAll(Duration timeout = WAIT) const { return tryLoad(FilterByNameFunction{}, timeout); } /// Loads a specified object. /// The function does nothing if it's already loaded. /// The function throws an exception if it's failed to load. - template , void>> + template , void>> // NOLINT ReturnType load(const String & name) const; /// Loads objects by filter. /// The function does nothing for already loaded objects, it just returns them. /// The function throws an exception if it's failed to load something. - template , void>> + template , void>> // NOLINT ReturnType load(const FilterByNameFunction & filter) const; /// Loads all objects. Not recommended to use. /// The function does nothing for already loaded objects, it just returns them. /// The function throws an exception if it's failed to load something. - template , void>> + template , void>> // NOLINT ReturnType loadAll() const { return load(FilterByNameFunction{}); } /// Loads or reloads a specified object. /// The function reloads the object if it's already loaded. /// The function throws an exception if it's failed to load or reload. - template , void>> + template , void>> // NOLINT ReturnType loadOrReload(const String & name) const; /// Loads or reloads objects by filter. /// The function reloads the objects which are already loaded. /// The function throws an exception if it's failed to load or reload something. - template , void>> + template , void>> // NOLINT ReturnType loadOrReload(const FilterByNameFunction & filter) const; /// Load or reloads all objects. Not recommended to use. /// The function throws an exception if it's failed to load or reload something. - template , void>> + template , void>> // NOLINT ReturnType loadOrReloadAll() const { return loadOrReload(FilterByNameFunction{}); } /// Reloads objects by filter which were tried to load before (successfully or not). /// The function throws an exception if it's failed to load or reload something. - template , void>> + template , void>> // NOLINT ReturnType reloadAllTriedToLoad() const; /// Check if object with name exists in configuration diff --git a/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp b/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp index a15f918f457..e404797501d 100644 --- a/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp +++ b/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp @@ -2,7 +2,7 @@ #include -#include +#include #include #include #include diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 80fe1c3a8ef..90756f1c84a 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -15,18 +15,7 @@ namespace DB static String typeToString(FilesystemCacheLogElement::CacheType type) { - switch (type) - { - case FilesystemCacheLogElement::CacheType::READ_FROM_CACHE: - return "READ_FROM_CACHE"; - case FilesystemCacheLogElement::CacheType::READ_FROM_FS_AND_DOWNLOADED_TO_CACHE: - return "READ_FROM_FS_AND_DOWNLOADED_TO_CACHE"; - case FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE: - return "READ_FROM_FS_BYPASSING_CACHE"; - case FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE: - return "WRITE_THROUGH_CACHE"; - } - UNREACHABLE(); + return String(magic_enum::enum_name(type)); } ColumnsDescription FilesystemCacheLogElement::getColumnsDescription() diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index c83ac4e62fd..27c616ff40c 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -15,7 +15,7 @@ namespace DB struct FilesystemCacheLogElement { - enum class CacheType + enum class CacheType : uint8_t { READ_FROM_CACHE, READ_FROM_FS_AND_DOWNLOADED_TO_CACHE, diff --git a/src/Interpreters/FilesystemReadPrefetchesLog.h b/src/Interpreters/FilesystemReadPrefetchesLog.h index 088613cbeac..b3eae87893b 100644 --- a/src/Interpreters/FilesystemReadPrefetchesLog.h +++ b/src/Interpreters/FilesystemReadPrefetchesLog.h @@ -10,7 +10,7 @@ namespace DB { -enum class FilesystemPrefetchState +enum class FilesystemPrefetchState : uint8_t { USED, CANCELLED_WITH_SEEK, diff --git a/src/Interpreters/GinFilter.cpp b/src/Interpreters/GinFilter.cpp index dd13a264d96..4ad0c4098d8 100644 --- a/src/Interpreters/GinFilter.cpp +++ b/src/Interpreters/GinFilter.cpp @@ -1,3 +1,5 @@ +// NOLINTBEGIN(clang-analyzer-optin.core.EnumCastOutOfRange) + #include #include #include @@ -29,7 +31,7 @@ GinFilterParameters::GinFilterParameters(size_t ngrams_, UInt64 max_rows_per_pos max_rows_per_postings_list = std::numeric_limits::max(); if (ngrams > 8) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The size of inverted index filter cannot be greater than 8"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The size of full-text index filter cannot be greater than 8"); } GinFilter::GinFilter(const GinFilterParameters & params_) @@ -174,3 +176,5 @@ bool GinFilter::match(const GinPostingsCache & postings_cache) const } } + +// NOLINTEND(clang-analyzer-optin.core.EnumCastOutOfRange) diff --git a/src/Interpreters/GinFilter.h b/src/Interpreters/GinFilter.h index 7924bd4ce3d..e7fd80cddd3 100644 --- a/src/Interpreters/GinFilter.h +++ b/src/Interpreters/GinFilter.h @@ -2,11 +2,11 @@ #include #include -#include namespace DB { +static inline constexpr auto FULL_TEXT_INDEX_NAME = "full_text"; static inline constexpr auto INVERTED_INDEX_NAME = "inverted"; static inline constexpr UInt64 UNLIMITED_ROWS_PER_POSTINGS_LIST = 0; static inline constexpr UInt64 MIN_ROWS_PER_POSTINGS_LIST = 8 * 1024; @@ -34,7 +34,7 @@ struct GinSegmentWithRowIdRange using GinSegmentWithRowIdRangeVector = std::vector; -/// GinFilter provides underlying functionalities for building inverted index and also +/// GinFilter provides underlying functionalities for building full-text index and also /// it does filtering the unmatched rows according to its query string. /// It also builds and uses skipping index which stores (segmentID, RowIDStart, RowIDEnd) triples. class GinFilter @@ -44,7 +44,7 @@ public: explicit GinFilter(const GinFilterParameters & params_); /// Add term (located at 'data' with length 'len') and its row ID to the postings list builder - /// for building inverted index for the given store. + /// for building full-text index for the given store. void add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store) const; /// Accumulate (segmentID, RowIDStart, RowIDEnd) for building skipping index diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 53d1f48c291..4dd2f89b90a 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -108,7 +108,7 @@ namespace class GraceHashJoin::FileBucket : boost::noncopyable { - enum class State : int + enum class State : uint8_t { WRITING_BLOCKS, JOINING_BLOCKS, @@ -415,7 +415,7 @@ void GraceHashJoin::addBuckets(const size_t bucket_count) void GraceHashJoin::checkTypesOfKeys(const Block & block) const { chassert(hash_join); - return hash_join->checkTypesOfKeys(block); + hash_join->checkTypesOfKeys(block); } void GraceHashJoin::initialize(const Block & sample_block) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 0c759d381d7..75da8bbc3e7 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -35,10 +35,17 @@ #include #include #include +#include "Core/Joins.h" +#include "Interpreters/TemporaryDataOnDisk.h" #include #include +namespace CurrentMetrics +{ + extern const Metric TemporaryFilesForJoin; +} + namespace DB { @@ -63,6 +70,7 @@ struct NotProcessedCrossJoin : public ExtraBlock { size_t left_position; size_t right_block; + std::unique_ptr reader; }; @@ -216,7 +224,7 @@ static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nulla { /// We have to replace values masked by NULLs with defaults. if (column.column) - if (const auto * nullable_column = checkAndGetColumn(*column.column)) + if (const auto * nullable_column = checkAndGetColumn(&*column.column)) column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); JoinCommon::removeColumnNullability(column); @@ -249,6 +257,10 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s , instance_id(instance_id_) , asof_inequality(table_join->getAsofInequality()) , data(std::make_shared()) + , tmp_data( + table_join_->getTempDataOnDisk() + ? std::make_unique(table_join_->getTempDataOnDisk(), CurrentMetrics::TemporaryFilesForJoin) + : nullptr) , right_sample_block(right_sample_block_) , max_joined_block_rows(table_join->maxJoinedBlockRows()) , instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "") @@ -693,7 +705,6 @@ namespace APPLY_FOR_JOIN_VARIANTS(M) #undef M } - UNREACHABLE(); } } @@ -827,15 +838,40 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) if (shrink_blocks) block_to_save = block_to_save.shrinkToFit(); + size_t max_bytes_in_join = table_join->sizeLimits().max_bytes; + size_t max_rows_in_join = table_join->sizeLimits().max_rows; + + if (kind == JoinKind::Cross && tmp_data + && (tmp_stream || (max_bytes_in_join && getTotalByteCount() + block_to_save.allocatedBytes() >= max_bytes_in_join) + || (max_rows_in_join && getTotalRowCount() + block_to_save.rows() >= max_rows_in_join))) + { + if (tmp_stream == nullptr) + { + tmp_stream = &tmp_data->createStream(right_sample_block); + } + tmp_stream->write(block_to_save); + return true; + } + size_t total_rows = 0; size_t total_bytes = 0; { if (storage_join_lock) throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates"); - data->blocks_allocated_size += block_to_save.allocatedBytes(); - assertBlocksHaveEqualStructure(data->sample_block, block_to_save, "joined block"); + + size_t min_bytes_to_compress = table_join->crossJoinMinBytesToCompress(); + size_t min_rows_to_compress = table_join->crossJoinMinRowsToCompress(); + + if (kind == JoinKind::Cross + && ((min_bytes_to_compress && getTotalByteCount() >= min_bytes_to_compress) + || (min_rows_to_compress && getTotalRowCount() >= min_rows_to_compress))) + { + block_to_save = block_to_save.compress(); + } + + data->blocks_allocated_size += block_to_save.allocatedBytes(); data->blocks.emplace_back(std::move(block_to_save)); Block * stored_block = &data->blocks.back(); @@ -934,7 +970,6 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) shrinkStoredBlocksToFit(total_bytes); - return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); } @@ -2228,11 +2263,13 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) { size_t start_left_row = 0; size_t start_right_block = 0; + std::unique_ptr reader = nullptr; if (not_processed) { auto & continuation = static_cast(*not_processed); start_left_row = continuation.left_position; start_right_block = continuation.right_block; + reader = std::move(continuation.reader); not_processed.reset(); } @@ -2261,16 +2298,12 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) size_t rows_left = block.rows(); size_t rows_added = 0; - for (size_t left_row = start_left_row; left_row < rows_left; ++left_row) { size_t block_number = 0; - for (const Block & block_right : data->blocks) - { - ++block_number; - if (block_number < start_right_block) - continue; + auto process_right_block = [&](const Block & block_right) + { size_t rows_right = block_right.rows(); rows_added += rows_right; @@ -2282,6 +2315,44 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const IColumn & column_right = *block_right.getByPosition(col_num).column; dst_columns[num_existing_columns + col_num]->insertRangeFrom(column_right, 0, rows_right); } + }; + + for (const Block & compressed_block_right : data->blocks) + { + ++block_number; + if (block_number < start_right_block) + continue; + + auto block_right = compressed_block_right.decompress(); + process_right_block(block_right); + if (rows_added > max_joined_block_rows) + { + break; + } + } + + if (tmp_stream && rows_added <= max_joined_block_rows) + { + if (reader == nullptr) + { + tmp_stream->finishWritingAsyncSafe(); + reader = tmp_stream->getReadStream(); + } + while (auto block_right = reader->read()) + { + ++block_number; + process_right_block(block_right); + if (rows_added > max_joined_block_rows) + { + break; + } + } + + /// It means, that reader->read() returned {} + if (rows_added <= max_joined_block_rows) + { + reader.reset(); + } } start_right_block = 0; @@ -2289,7 +2360,7 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) if (rows_added > max_joined_block_rows) { not_processed = std::make_shared( - NotProcessedCrossJoin{{block.cloneEmpty()}, left_row, block_number + 1}); + NotProcessedCrossJoin{{block.cloneEmpty()}, left_row, block_number + 1, std::move(reader)}); not_processed->block.swap(block); break; } @@ -2415,10 +2486,15 @@ HashJoin::~HashJoin() { if (!data) { - LOG_TRACE(log, "{}Join data has been already released", instance_log_id); + LOG_TEST(log, "{}Join data has been already released", instance_log_id); return; } - LOG_TRACE(log, "{}Join data is being destroyed, {} bytes and {} rows in hash table", instance_log_id, getTotalByteCount(), getTotalRowCount()); + LOG_TEST( + log, + "{}Join data is being destroyed, {} bytes and {} rows in hash table", + instance_log_id, + getTotalByteCount(), + getTotalRowCount()); } template @@ -2564,8 +2640,6 @@ private: default: throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type); } - - UNREACHABLE(); } template diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index b7f41a7eb6b..a0996556f9a 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -26,6 +26,7 @@ #include #include +#include namespace DB { @@ -253,7 +254,7 @@ public: M(key_string) \ M(key_fixed_string) - enum class Type + enum class Type : uint8_t { EMPTY, CROSS, @@ -321,8 +322,6 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } - - UNREACHABLE(); } size_t getTotalByteCountImpl(Type which) const @@ -337,8 +336,6 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } - - UNREACHABLE(); } size_t getBufferSizeInCells(Type which) const @@ -353,8 +350,6 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } - - UNREACHABLE(); } /// NOLINTEND(bugprone-macro-parentheses) }; @@ -442,6 +437,10 @@ private: RightTableDataPtr data; std::vector key_sizes; + /// Needed to do external cross join + TemporaryDataOnDiskPtr tmp_data; + TemporaryFileStream* tmp_stream{nullptr}; + /// Block with columns from the right-side table. Block right_sample_block; /// Block with columns from the right-side table except key columns. diff --git a/src/Interpreters/IExternalLoadable.cpp b/src/Interpreters/IExternalLoadable.cpp index 5c2df092179..484dfbe2d97 100644 --- a/src/Interpreters/IExternalLoadable.cpp +++ b/src/Interpreters/IExternalLoadable.cpp @@ -22,12 +22,8 @@ UInt64 calculateDurationWithBackoff(pcg64 & rnd_engine, size_t error_count) constexpr UInt64 backoff_initial_sec = 5; constexpr UInt64 backoff_max_sec = 10 * 60; /// 10 minutes - if (error_count < 1) - error_count = 1; - /// max seconds is 600 and 2 ** 10 == 1024 - if (error_count > 11) - error_count = 11; + error_count = std::clamp(error_count, 1, 11); std::uniform_int_distribution distribution(0, static_cast(std::exp2(error_count - 1))); return std::min(backoff_max_sec, backoff_initial_sec + distribution(rnd_engine)); diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index a43e0fbaad1..7374348da50 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -27,7 +27,7 @@ using IBlocksStreamPtr = std::shared_ptr; class IJoin; using JoinPtr = std::shared_ptr; -enum class JoinPipelineType +enum class JoinPipelineType : uint8_t { /* * Right stream processed first, then when join data structures are ready, the left stream is processed using it. diff --git a/src/Interpreters/IKeyValueEntity.h b/src/Interpreters/IKeyValueEntity.h index 856ce28bae7..f9287e4793c 100644 --- a/src/Interpreters/IKeyValueEntity.h +++ b/src/Interpreters/IKeyValueEntity.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Interpreters/ITokenExtractor.cpp b/src/Interpreters/ITokenExtractor.cpp index 9c4027dfa0a..1c5d0d4b6d4 100644 --- a/src/Interpreters/ITokenExtractor.cpp +++ b/src/Interpreters/ITokenExtractor.cpp @@ -2,7 +2,7 @@ #include -#include +#include #include #include diff --git a/src/Interpreters/ITokenExtractor.h b/src/Interpreters/ITokenExtractor.h index fdcc9880bff..2423ef12311 100644 --- a/src/Interpreters/ITokenExtractor.h +++ b/src/Interpreters/ITokenExtractor.h @@ -32,7 +32,7 @@ struct ITokenExtractor virtual void stringPaddedToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter) const { - return stringToBloomFilter(data, length, bloom_filter); + stringToBloomFilter(data, length, bloom_filter); } virtual void stringLikeToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter) const = 0; @@ -41,7 +41,7 @@ struct ITokenExtractor virtual void stringPaddedToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const { - return stringToGinFilter(data, length, gin_filter); + stringToGinFilter(data, length, gin_filter); } virtual void stringLikeToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const = 0; diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index bddaf3ed769..df65284699f 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -25,7 +25,7 @@ struct IdentifierSemanticImpl /// Static class to manipulate IdentifierSemanticImpl via ASTIdentifier struct IdentifierSemantic { - enum class ColumnMatch + enum class ColumnMatch : uint8_t { NoMatch, ColumnName, /// column qualified with column names list diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index ae8cef3f102..4a84a7bf570 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -334,10 +334,10 @@ public: if ((columns.size() != 3 && columns.size() != 5) || column_position_to_check >= columns.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of columns: {}, position {}", columns.size(), column_position_to_check); - const auto * col = checkAndGetColumn(columns[column_position_to_check].get()); - for (size_t i = 0; i < col->size(); ++i) + const auto & col = checkAndGetColumn(*columns[column_position_to_check]); + for (size_t i = 0; i < col.size(); ++i) { - if (col->getElement(i) == 0) + if (col.getElement(i) == 0) { result_value = 0; return; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 29c6c101910..f2e03ca41bd 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -6,7 +6,7 @@ #include #include "Common/Exception.h" -#include +#include #include #include #include @@ -140,7 +140,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) /// Will write file with database metadata, if needed. String database_name_escaped = escapeForFileName(database_name); - fs::path metadata_path = fs::canonical(getContext()->getPath()); + fs::path metadata_path = fs::weakly_canonical(getContext()->getPath()); + fs::create_directories(metadata_path / "metadata"); fs::path metadata_file_tmp_path = metadata_path / "metadata" / (database_name_escaped + ".sql.tmp"); fs::path metadata_file_path = metadata_path / "metadata" / (database_name_escaped + ".sql"); @@ -504,7 +505,7 @@ ASTPtr InterpreterCreateQuery::formatProjections(const ProjectionsDescription & } ColumnsDescription InterpreterCreateQuery::getColumnsDescription( - const ASTExpressionList & columns_ast, ContextPtr context_, LoadingStrictnessLevel mode) + const ASTExpressionList & columns_ast, ContextPtr context_, LoadingStrictnessLevel mode, bool is_restore_from_backup) { /// First, deduce implicit types. @@ -513,7 +514,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( ASTPtr default_expr_list = std::make_shared(); NamesAndTypesList column_names_and_types; - bool make_columns_nullable = mode <= LoadingStrictnessLevel::CREATE && context_->getSettingsRef().data_type_default_nullable; + bool make_columns_nullable = mode <= LoadingStrictnessLevel::SECONDARY_CREATE && !is_restore_from_backup && context_->getSettingsRef().data_type_default_nullable; bool has_columns_with_default_without_type = false; for (const auto & ast : columns_ast.children) @@ -693,7 +694,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( res.add(std::move(column)); } - if (mode <= LoadingStrictnessLevel::CREATE && context_->getSettingsRef().flatten_nested) + if (mode <= LoadingStrictnessLevel::SECONDARY_CREATE && !is_restore_from_backup && context_->getSettingsRef().flatten_nested) res.flattenNested(); @@ -738,7 +739,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (create.columns_list->columns) { - properties.columns = getColumnsDescription(*create.columns_list->columns, getContext(), mode); + properties.columns = getColumnsDescription(*create.columns_list->columns, getContext(), mode, is_restore_from_backup); } if (create.columns_list->indices) @@ -747,15 +748,17 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti IndexDescription index_desc = IndexDescription::getIndexFromAST(index->clone(), properties.columns, getContext()); if (properties.indices.has(index_desc.name)) throw Exception(ErrorCodes::ILLEGAL_INDEX, "Duplicated index name {} is not allowed. Please use different index names.", backQuoteIfNeed(index_desc.name)); + const auto & settings = getContext()->getSettingsRef(); - if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index) - { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "Experimental Inverted Index feature is not enabled (the setting 'allow_experimental_inverted_index')"); - } + if (index_desc.type == FULL_TEXT_INDEX_NAME && !settings.allow_experimental_full_text_index) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental full-text index feature is not enabled (the setting 'allow_experimental_full_text_index')"); + /// ---- + /// Temporary check during a transition period. Please remove at the end of 2024. + if (index_desc.type == INVERTED_INDEX_NAME && settings.allow_experimental_inverted_index) /// The funny condition is not a mistake, see 02346_fulltext_index_old_name.sql + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'"); + /// ---- if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index) throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index"); - if (index_desc.type == "usearch" && !settings.allow_experimental_usearch_index) throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index is disabled. Turn on allow_experimental_usearch_index"); @@ -840,7 +843,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected application state. CREATE query is missing either its storage or engine."); /// We can have queries like "CREATE TABLE ENGINE=" if /// supports schema inference (will determine table structure in it's constructor). - else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) + else if (!StorageFactory::instance().getStorageFeatures(create.storage->engine->name).supports_schema_inference) throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect CREATE query: required list of column descriptions or AS section or SELECT."); /// Even if query has list of columns, canonicalize it (unfold Nested columns). @@ -979,6 +982,13 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_ordinary_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name); + if (as_create.is_materialized_view && as_create.to_table_id) + throw Exception( + ErrorCodes::INCORRECT_QUERY, + "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead", + qualified_name, + as_create.to_table_id.getQualifiedName()); + if (as_create.is_live_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name); @@ -1495,7 +1505,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, validateVirtualColumns(*res); - if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) + if (!res->supportsDynamicSubcolumnsDeprecated() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()) && mode <= LoadingStrictnessLevel::CREATE) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column of type Object, " diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 71bdeda05df..be4a10eaf1d 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -74,7 +74,7 @@ public: /// Obtain information about columns, their types, default values and column comments, /// for case when columns in CREATE query is specified explicitly. - static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, ContextPtr context, LoadingStrictnessLevel mode); + static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, ContextPtr context, LoadingStrictnessLevel mode, bool is_restore_from_backup = false); static ConstraintsDescription getConstraintsDescription(const ASTExpressionList * constraints); static void prepareOnClusterQuery(ASTCreateQuery & create, ContextPtr context, const String & cluster_name); diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index ee774994145..9cfb8e486cb 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int TABLE_IS_READ_ONLY; extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } @@ -107,7 +108,19 @@ BlockIO InterpreterDeleteQuery::execute() } else { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName()); + /// Currently just better exception for the case of a table with projection, + /// can act differently according to the setting. + if (table->hasProjection()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "DELETE query is not supported for table {}", + table->getStorageID().getFullTableName()); } } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 12677c422b8..128854e87ba 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -552,7 +552,11 @@ BlockIO InterpreterInsertQuery::execute() { /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); } } diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index b63672f1757..eeb762b4d7e 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -40,7 +40,6 @@ BlockIO InterpreterRenameQuery::execute() getContext()->checkAccess(getRequiredAccess(rename.database ? RenameType::RenameDatabase : RenameType::RenameTable)); - String path = getContext()->getPath(); String current_database = getContext()->getCurrentDatabase(); /** In case of error while renaming, it is possible that only part of tables was renamed diff --git a/src/Interpreters/InterpreterRenameQuery.h b/src/Interpreters/InterpreterRenameQuery.h index 79a6c0ddef4..8d7a0332c6e 100644 --- a/src/Interpreters/InterpreterRenameQuery.h +++ b/src/Interpreters/InterpreterRenameQuery.h @@ -64,7 +64,7 @@ private: BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards); BlockIO executeToDatabase(const ASTRenameQuery & rename, const RenameDescriptions & descriptions); - enum class RenameType + enum class RenameType : uint8_t { RenameTable, RenameDatabase diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index efc37fd76af..e72cf670f69 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -657,7 +657,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( MergeTreeWhereOptimizer where_optimizer{ std::move(column_compressed_sizes), metadata_snapshot, - storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context), + storage->getConditionEstimatorByPredicate(storage_snapshot, nullptr, context), queried_columns, supported_prewhere_columns, log}; @@ -2499,10 +2499,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc max_block_size = std::max(1, max_block_limited); max_threads_execute_query = max_streams = 1; } + if (local_limits.local_limits.size_limits.max_rows != 0) { if (max_block_limited < local_limits.local_limits.size_limits.max_rows) query_info.limit = max_block_limited; + else if (local_limits.local_limits.size_limits.max_rows < std::numeric_limits::max()) /// Ask to read just enough rows to make the max_rows limit effective (so it has a chance to be triggered). + query_info.limit = 1 + local_limits.local_limits.size_limits.max_rows; } else { diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index c307e457649..e89a1e5febf 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -200,7 +200,7 @@ private: /// Check if we can limit block size to read based on LIMIT clause UInt64 maxBlockSizeByLimit() const; - enum class Modificator + enum class Modificator : uint8_t { ROLLUP = 0, CUBE = 1, diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 539d7a59f6f..d4af111eec0 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include @@ -38,22 +40,47 @@ namespace ErrorCodes namespace { -ASTPtr normalizeAndValidateQuery(const ASTPtr & query) +ASTPtr normalizeAndValidateQuery(const ASTPtr & query, const Names & column_names) { + ASTPtr result_query; + if (query->as() || query->as()) - { - return query; - } + result_query = query; else if (auto * subquery = query->as()) - { - return subquery->children[0]; - } + result_query = subquery->children[0]; else - { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected ASTSelectWithUnionQuery or ASTSelectQuery. Actual {}", query->formatForErrorMessage()); - } + + if (column_names.empty()) + return result_query; + + /// The initial query the VIEW references to is wrapped here with another SELECT query to allow reading only necessary columns. + auto select_query = std::make_shared(); + + auto result_table_expression_ast = std::make_shared(); + result_table_expression_ast->children.push_back(std::make_shared(std::move(result_query))); + result_table_expression_ast->subquery = result_table_expression_ast->children.back(); + + auto tables_in_select_query_element_ast = std::make_shared(); + tables_in_select_query_element_ast->children.push_back(std::move(result_table_expression_ast)); + tables_in_select_query_element_ast->table_expression = tables_in_select_query_element_ast->children.back(); + + ASTPtr tables_in_select_query_ast = std::make_shared(); + tables_in_select_query_ast->children.push_back(std::move(tables_in_select_query_element_ast)); + + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select_query_ast)); + + auto projection_expression_list_ast = std::make_shared(); + projection_expression_list_ast->children.reserve(column_names.size()); + + for (const auto & column_name : column_names) + projection_expression_list_ast->children.push_back(std::make_shared(column_name)); + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(projection_expression_list_ast)); + + return select_query; } ContextMutablePtr buildContext(const ContextPtr & context, const SelectQueryOptions & select_query_options) @@ -125,8 +152,9 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( const ASTPtr & query_, const ContextPtr & context_, - const SelectQueryOptions & select_query_options_) - : query(normalizeAndValidateQuery(query_)) + const SelectQueryOptions & select_query_options_, + const Names & column_names) + : query(normalizeAndValidateQuery(query_, column_names)) , context(buildContext(context_, select_query_options_)) , select_query_options(select_query_options_) , query_tree(buildQueryTreeAndRunPasses(query, select_query_options, context, nullptr /*storage*/)) @@ -138,8 +166,9 @@ InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( const ASTPtr & query_, const ContextPtr & context_, const StoragePtr & storage_, - const SelectQueryOptions & select_query_options_) - : query(normalizeAndValidateQuery(query_)) + const SelectQueryOptions & select_query_options_, + const Names & column_names) + : query(normalizeAndValidateQuery(query_, column_names)) , context(buildContext(context_, select_query_options_)) , select_query_options(select_query_options_) , query_tree(buildQueryTreeAndRunPasses(query, select_query_options, context, storage_)) diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h index 2ad7e6a50f3..73c524cbe28 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.h +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h @@ -16,7 +16,8 @@ public: /// Initialize interpreter with query AST InterpreterSelectQueryAnalyzer(const ASTPtr & query_, const ContextPtr & context_, - const SelectQueryOptions & select_query_options_); + const SelectQueryOptions & select_query_options_, + const Names & column_names = {}); /** Initialize interpreter with query AST and storage. * After query tree is built left most table expression is replaced with table node that @@ -25,7 +26,8 @@ public: InterpreterSelectQueryAnalyzer(const ASTPtr & query_, const ContextPtr & context_, const StoragePtr & storage_, - const SelectQueryOptions & select_query_options_); + const SelectQueryOptions & select_query_options_, + const Names & column_names = {}); /** Initialize interpreter with query tree. * No query tree passes are applied. diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 498030a1552..d3526941b33 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -51,11 +51,12 @@ #include #include #include -#include #include -#include +#include +#include +#include +#include #include -#include #include #include #include @@ -500,17 +501,17 @@ BlockIO InterpreterSystemQuery::execute() StorageFile::getSchemaCache(getContext()).clear(); #if USE_AWS_S3 if (caches_to_drop.contains("S3")) - StorageS3::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageS3Configuration::type_name).clear(); #endif #if USE_HDFS if (caches_to_drop.contains("HDFS")) - StorageHDFS::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageHDFSConfiguration::type_name).clear(); #endif if (caches_to_drop.contains("URL")) StorageURL::getSchemaCache(getContext()).clear(); #if USE_AZURE_BLOB_STORAGE if (caches_to_drop.contains("AZURE")) - StorageAzureBlob::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageAzureConfiguration::type_name).clear(); #endif break; } diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index d31ace758c4..13872fbe3f5 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -33,7 +33,6 @@ BlockIO InterpreterTransactionControlQuery::execute() case ASTTransactionControl::SET_SNAPSHOT: return executeSetSnapshot(session_context, tcl.snapshot); } - UNREACHABLE(); } BlockIO InterpreterTransactionControlQuery::executeBegin(ContextMutablePtr session_context) diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp index c344732a262..1327a2ef388 100644 --- a/src/Interpreters/InterserverCredentials.cpp +++ b/src/Interpreters/InterserverCredentials.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 046d0b4fc10..21c773ee1d7 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -119,9 +119,9 @@ public: return result; } - inline size_t getAllocatedSize() const { return allocated_size; } + size_t getAllocatedSize() const { return allocated_size; } - inline size_t getPageSize() const { return page_size; } + size_t getPageSize() const { return page_size; } ~PageArena() { @@ -177,10 +177,10 @@ private: { } - inline void * base() const { return pages_base; } - inline size_t pagesSize() const { return pages_size; } - inline size_t pageSize() const { return page_size; } - inline size_t blockSize() const { return pages_size * page_size; } + void * base() const { return pages_base; } + size_t pagesSize() const { return pages_size; } + size_t pageSize() const { return page_size; } + size_t blockSize() const { return pages_size * page_size; } private: void * pages_base; @@ -298,7 +298,7 @@ public: return true; } - inline size_t allocatedSize() const + size_t allocatedSize() const { size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize(); size_t code_size = ex_page_arena.getAllocatedSize(); diff --git a/src/Interpreters/JIT/CHJIT.h b/src/Interpreters/JIT/CHJIT.h index fc883802426..89d446fd3b3 100644 --- a/src/Interpreters/JIT/CHJIT.h +++ b/src/Interpreters/JIT/CHJIT.h @@ -85,7 +85,7 @@ public: /** Total compiled code size for module that are currently valid. */ - inline size_t getCompiledCodeSize() const { return compiled_code_size.load(std::memory_order_relaxed); } + size_t getCompiledCodeSize() const { return compiled_code_size.load(std::memory_order_relaxed); } private: diff --git a/src/Interpreters/JIT/CompileDAG.h b/src/Interpreters/JIT/CompileDAG.h index 77a02230f55..8db4ac5e110 100644 --- a/src/Interpreters/JIT/CompileDAG.h +++ b/src/Interpreters/JIT/CompileDAG.h @@ -33,7 +33,7 @@ class CompileDAG { public: - enum class CompileType + enum class CompileType : uint8_t { INPUT = 0, CONSTANT = 1, @@ -65,17 +65,17 @@ public: nodes.emplace_back(std::move(node)); } - inline size_t getNodesCount() const { return nodes.size(); } - inline size_t getInputNodesCount() const { return input_nodes_count; } + size_t getNodesCount() const { return nodes.size(); } + size_t getInputNodesCount() const { return input_nodes_count; } - inline Node & operator[](size_t index) { return nodes[index]; } - inline const Node & operator[](size_t index) const { return nodes[index]; } + Node & operator[](size_t index) { return nodes[index]; } + const Node & operator[](size_t index) const { return nodes[index]; } - inline Node & front() { return nodes.front(); } - inline const Node & front() const { return nodes.front(); } + Node & front() { return nodes.front(); } + const Node & front() const { return nodes.front(); } - inline Node & back() { return nodes.back(); } - inline const Node & back() const { return nodes.back(); } + Node & back() { return nodes.back(); } + const Node & back() const { return nodes.back(); } private: std::vector nodes; diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index 31d3920ccfd..9ab710ae537 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -215,7 +215,7 @@ static void compileCreateAggregateStatesFunctions(llvm::Module & module, const s b.CreateRetVoid(); } -enum class AddIntoAggregateStatesPlacesArgumentType +enum class AddIntoAggregateStatesPlacesArgumentType : uint8_t { SinglePlace, MultiplePlaces, diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 5cda4c982b4..6a3a181ed26 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 0aee96ee9c4..1788c9aca48 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -162,7 +162,7 @@ static ColumnPtr tryConvertColumnToNullable(ColumnPtr col) return col_lc.cloneNullable(); } } - else if (const ColumnConst * col_const = checkAndGetColumn(*col)) + else if (const ColumnConst * col_const = checkAndGetColumn(&*col)) { const auto & nested = col_const->getDataColumnPtr(); if (nested->isNullable() || nested->canBeInsideNullable()) @@ -232,7 +232,7 @@ void removeColumnNullability(ColumnWithTypeAndName & column) if (column.column && column.column->isNullable()) { column.column = column.column->convertToFullColumnIfConst(); - const auto * nullable_col = checkAndGetColumn(*column.column); + const auto * nullable_col = checkAndGetColumn(column.column.get()); if (!nullable_col) { throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' is expected to be nullable", column.dumpStructure()); @@ -258,11 +258,11 @@ void changeColumnRepresentation(const ColumnPtr & src_column, ColumnPtr & dst_co if (nullable_src && !nullable_dst) { - const auto * nullable = checkAndGetColumn(*src_column); + const auto & nullable = checkAndGetColumn(*src_column); if (change_lowcard) - dst_column = changeLowCardinality(nullable->getNestedColumnPtr(), dst_column); + dst_column = changeLowCardinality(nullable.getNestedColumnPtr(), dst_column); else - dst_column = nullable->getNestedColumnPtr(); + dst_column = nullable.getNestedColumnPtr(); } else if (!nullable_src && nullable_dst) { @@ -275,7 +275,7 @@ void changeColumnRepresentation(const ColumnPtr & src_column, ColumnPtr & dst_co { if (change_lowcard) { - if (const auto * nullable = checkAndGetColumn(*src_column)) + if (const auto * nullable = checkAndGetColumn(&*src_column)) { dst_column = makeNullable(changeLowCardinality(nullable->getNestedColumnPtr(), dst_not_null)); assert_cast(*dst_column->assumeMutable()).applyNullMap(nullable->getNullMapColumn()); @@ -291,7 +291,7 @@ void changeColumnRepresentation(const ColumnPtr & src_column, ColumnPtr & dst_co ColumnPtr emptyNotNullableClone(const ColumnPtr & column) { if (column->isNullable()) - return checkAndGetColumn(*column)->getNestedColumnPtr()->cloneEmpty(); + return checkAndGetColumn(*column).getNestedColumnPtr()->cloneEmpty(); return column->cloneEmpty(); } @@ -374,10 +374,10 @@ ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_nam key_columns[i] = block_keys.getByName(column_name).column.get(); /// We will join only keys, where all components are not NULL. - if (const auto * nullable = checkAndGetColumn(*key_columns[i])) + if (const auto * nullable = checkAndGetColumn(&*key_columns[i])) key_columns[i] = &nullable->getNestedColumn(); - if (const auto * sparse = checkAndGetColumn(*key_columns[i])) + if (const auto * sparse = checkAndGetColumn(&*key_columns[i])) key_columns[i] = &sparse->getValuesColumn(); } @@ -490,7 +490,7 @@ JoinMask getColumnAsMask(const Block & block, const String & column_name) if (isNothing(col_type)) return JoinMask(false, block.rows()); - if (const auto * const_cond = checkAndGetColumn(*src_col.column)) + if (const auto * const_cond = checkAndGetColumn(&*src_col.column)) { return JoinMask(const_cond->getBool(0), block.rows()); } diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h index ff48f34d82c..f15ee2c2fb2 100644 --- a/src/Interpreters/JoinUtils.h +++ b/src/Interpreters/JoinUtils.h @@ -49,7 +49,7 @@ public: return nullptr; } - inline bool isRowFiltered(size_t row) const + bool isRowFiltered(size_t row) const { return !assert_cast(*column).getData()[row]; } diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 5b549a19083..457ed3ef4a6 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -310,7 +310,7 @@ std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & se auto settings = context->getSettingsRef(); MultiEnum join_algorithm = settings.join_algorithm; bool try_use_direct_join = join_algorithm.isSet(JoinAlgorithm::DIRECT) || join_algorithm.isSet(JoinAlgorithm::DEFAULT); - auto table_join = std::make_shared(settings, context->getGlobalTemporaryVolume()); + auto table_join = std::make_shared(settings, context->getGlobalTemporaryVolume(), context->getTempDataOnDisk()); const ASTTablesInSelectQueryElement * ast_join = select_query_.join(); const auto & table_to_join = ast_join->table_expression->as(); diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index d5fb0208d45..b374175d466 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -80,8 +80,8 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if constexpr (has_left_nulls && has_right_nulls) { - const auto * left_nullable = checkAndGetColumn(left_column); - const auto * right_nullable = checkAndGetColumn(right_column); + const auto * left_nullable = checkAndGetColumn(&left_column); + const auto * right_nullable = checkAndGetColumn(&right_column); if (left_nullable && right_nullable) { @@ -99,7 +99,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if constexpr (has_left_nulls) { - if (const auto * left_nullable = checkAndGetColumn(left_column)) + if (const auto * left_nullable = checkAndGetColumn(&left_column)) { if (left_column.isNullAt(lhs_pos)) return null_direction_hint; @@ -109,7 +109,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if constexpr (has_right_nulls) { - if (const auto * right_nullable = checkAndGetColumn(right_column)) + if (const auto * right_nullable = checkAndGetColumn(&right_column)) { if (right_column.isNullAt(rhs_pos)) return -null_direction_hint; @@ -604,7 +604,7 @@ void MergeJoin::mergeInMemoryRightBlocks() /// TODO: there should be no split keys by blocks for RIGHT|FULL JOIN builder.addTransform(std::make_shared( - builder.getHeader(), right_sort_description, max_rows_in_right_block, 0, false, 0, 0, 0, nullptr, 0)); + builder.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, false, 0, 0, 0, nullptr, 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); @@ -700,8 +700,10 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) /// We need to check type of masks before `addConditionJoinColumn`, because it assumes that types is correct JoinCommon::checkTypesOfMasks(block, mask_column_name_left, right_sample_block, mask_column_name_right); - /// Add auxiliary column, will be removed after joining - addConditionJoinColumn(block, JoinTableSide::Left); + if (!not_processed) + /// Add an auxiliary column, which will be removed after joining + /// We do not need to add it twice when we are continuing to process the block from the previous iteration + addConditionJoinColumn(block, JoinTableSide::Left); /// Types of keys can be checked only after `checkTypesOfKeys` JoinCommon::checkTypesOfKeys(block, key_names_left, right_table_keys, key_names_right); diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 4486c134d51..a93ab3e067f 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -15,7 +15,7 @@ class TableJoin; class MergeJoinCursor; struct MergeJoinEqualRange; class RowBitmaps; -enum class JoinTableSide; +enum class JoinTableSide : uint8_t; class MergeJoin : public IJoin { diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h deleted file mode 100644 index e1df45ed36b..00000000000 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ /dev/null @@ -1,157 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -using Monotonicity = IFunctionBase::Monotonicity; - -/// Checks from bottom to top if function composition is monotonous -class MonotonicityCheckMatcher -{ -public: - struct Data - { - const TablesWithColumns & tables; - ContextPtr context; - const std::unordered_set & group_by_function_hashes; - - Monotonicity monotonicity = { .is_monotonic = true, .is_positive = true, .is_always_monotonic = true }; - - ASTIdentifier * identifier = nullptr; - DataTypePtr arg_data_type = {}; - - void reject() { monotonicity.is_monotonic = false; } - bool isRejected() const { return !monotonicity.is_monotonic; } - - bool canOptimize(const ASTFunction & ast_function) const - { - /// if GROUP BY contains the same function ORDER BY shouldn't be optimized - const auto hash = ast_function.getTreeHash(/*ignore_aliases=*/ true); - const auto key = toString(hash); - if (group_by_function_hashes.contains(key)) - return false; - - /// if ORDER BY contains aggregate function or window functions, it - /// shouldn't be optimized - if (ast_function.is_window_function - || AggregateUtils::isAggregateFunction(ast_function)) - { - return false; - } - - return true; - } - - bool extractIdentifierAndType(const ASTFunction & ast_function) - { - if (identifier) - return true; - - identifier = ast_function.arguments->children[0]->as(); - if (!identifier) - return false; - - auto pos = IdentifierSemantic::getMembership(*identifier); - if (!pos) - pos = IdentifierSemantic::chooseTableColumnMatch(*identifier, tables, true); - if (!pos) - return false; - - /// It is possible that tables list is empty. - /// IdentifierSemantic get the position from AST, and it can be not valid to use it. - /// Example is re-analysing a part of AST for storage Merge, see 02147_order_by_optimizations.sql - if (*pos >= tables.size()) - return false; - - if (auto data_type_and_name = tables[*pos].columns.tryGetByName(identifier->shortName())) - { - arg_data_type = data_type_and_name->type; - return true; - } - - return false; - } - }; - - static void visit(const ASTPtr & ast, Data & data) - { - if (const auto * ast_function = ast->as()) - visit(*ast_function, data); - } - - static void visit(const ASTFunction & ast_function, Data & data) - { - if (data.isRejected()) - return; - - /// TODO: monotonicity for functions of several arguments - if (!ast_function.arguments || ast_function.arguments->children.size() != 1) - { - data.reject(); - return; - } - - if (!data.canOptimize(ast_function)) - { - data.reject(); - return; - } - - const auto & function = FunctionFactory::instance().tryGet(ast_function.name, data.context); - if (!function) - { - data.reject(); - return; - } - - /// First time extract the most enclosed identifier and its data type - if (!data.arg_data_type && !data.extractIdentifierAndType(ast_function)) - { - data.reject(); - return; - } - - ColumnsWithTypeAndName args; - args.emplace_back(data.arg_data_type, "tmp"); - auto function_base = function->build(args); - - if (function_base && function_base->hasInformationAboutMonotonicity()) - { - bool is_positive = data.monotonicity.is_positive; - data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, Field(), Field()); - - if (!is_positive) - data.monotonicity.is_positive = !data.monotonicity.is_positive; - data.arg_data_type = function_base->getResultType(); - } - else - data.reject(); - } - - static bool needChildVisit(const ASTPtr & parent, const ASTPtr &) - { - /// Currently we check monotonicity only for single-argument functions. - /// Although, multi-argument functions with all but one constant arguments can also be monotonic. - if (const auto * func = typeid_cast(parent.get())) - return func->arguments->children.size() < 2; - - return true; - } -}; - -using MonotonicityCheckVisitor = ConstInDepthNodeVisitor; - -} diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 5fc7c019bb4..4f6c1c5f18b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1321,7 +1321,7 @@ void MutationsInterpreter::validate() if (nondeterministic_func_data.nondeterministic_function_name) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions. " + "The source storage is replicated so ALTER UPDATE/ALTER DELETE statements must use only deterministic functions. " "Function '{}' is non-deterministic", *nondeterministic_func_data.nondeterministic_function_name); } } @@ -1417,8 +1417,7 @@ bool MutationsInterpreter::isAffectingAllColumns() const void MutationsInterpreter::MutationKind::set(const MutationKindEnum & kind) { - if (mutation_kind < kind) - mutation_kind = kind; + mutation_kind = std::max(mutation_kind, kind); } } diff --git a/src/Interpreters/NullableUtils.cpp b/src/Interpreters/NullableUtils.cpp index ce681b1d569..fa0ddae8c90 100644 --- a/src/Interpreters/NullableUtils.cpp +++ b/src/Interpreters/NullableUtils.cpp @@ -12,7 +12,7 @@ ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullM if (key_columns.size() == 1) { auto & column = key_columns[0]; - if (const auto * column_nullable = checkAndGetColumn(*column)) + if (const auto * column_nullable = checkAndGetColumn(&*column)) { null_map_holder = column_nullable->getNullMapColumnPtr(); null_map = &column_nullable->getNullMapData(); @@ -23,7 +23,7 @@ ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullM { for (auto & column : key_columns) { - if (const auto * column_nullable = checkAndGetColumn(*column)) + if (const auto * column_nullable = checkAndGetColumn(&*column)) { column = &column_nullable->getNestedColumn(); diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index aa11749f8a6..8d777c640c7 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -20,11 +20,11 @@ ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() auto span_kind_type = std::make_shared( DataTypeEnum8::Values { - {"INTERNAL", static_cast(OpenTelemetry::INTERNAL)}, - {"SERVER", static_cast(OpenTelemetry::SERVER)}, - {"CLIENT", static_cast(OpenTelemetry::CLIENT)}, - {"PRODUCER", static_cast(OpenTelemetry::PRODUCER)}, - {"CONSUMER", static_cast(OpenTelemetry::CONSUMER)} + {"INTERNAL", static_cast(OpenTelemetry::SpanKind::INTERNAL)}, + {"SERVER", static_cast(OpenTelemetry::SpanKind::SERVER)}, + {"CLIENT", static_cast(OpenTelemetry::SpanKind::CLIENT)}, + {"PRODUCER", static_cast(OpenTelemetry::SpanKind::PRODUCER)}, + {"CONSUMER", static_cast(OpenTelemetry::SpanKind::CONSUMER)} } ); diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index f3504f3f403..20451fb20ad 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -27,6 +27,11 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v value = literal->value.get(); return true; } + if (literal->value.getType() == Field::Types::Null) + { + value = false; + return true; + } } /// cast of numeric constant in condition to UInt8 diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 66f933f1afa..db339375231 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -101,7 +101,7 @@ ColumnsDescription PartLogElement::getColumnsDescription() "Can have one of the following values: " "NewPart — Inserting of a new data part, " "MergeParts — Merging of data parts, " - "DownloadParts — Downloading a data part, " + "DownloadPart — Downloading a data part, " "RemovePart — Removing or detaching a data part using DETACH PARTITION, " "MutatePart — Mutating of a data part, " "MovePart — Moving the data part from the one disk to another one."}, diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 19f9b9afdda..accb73e12df 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -143,14 +143,14 @@ protected: /// Container of PipelineExecutors to be cancelled when a cancelQuery is received std::unordered_map executors; - enum QueryStreamsStatus + enum class QueryStreamsStatus : uint8_t { NotInitialized, Initialized, Released }; - QueryStreamsStatus query_streams_status{NotInitialized}; + QueryStreamsStatus query_streams_status{QueryStreamsStatus::NotInitialized}; ProcessListForUser * user_process_list = nullptr; diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index f47635a3c3f..a8639906aad 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/RewriteCountDistinctVisitor.cpp b/src/Interpreters/RewriteCountDistinctVisitor.cpp index cf28d8abb87..2a898f8789b 100644 --- a/src/Interpreters/RewriteCountDistinctVisitor.cpp +++ b/src/Interpreters/RewriteCountDistinctVisitor.cpp @@ -42,7 +42,6 @@ void RewriteCountDistinctFunctionMatcher::visit(ASTPtr & ast, Data & /*data*/) auto cloned_select_query = selectq->clone(); expr_list->children[0] = makeASTFunction("count"); - auto table_name = table_expr->database_and_table_name->as()->name(); table_expr->children.clear(); table_expr->children.emplace_back(std::make_shared()); table_expr->database_and_table_name = nullptr; diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/S3QueueLog.h index a7adbf28b3d..19e69c39247 100644 --- a/src/Interpreters/S3QueueLog.h +++ b/src/Interpreters/S3QueueLog.h @@ -20,7 +20,7 @@ struct S3QueueLogElement std::string file_name; size_t rows_processed = 0; - enum class S3QueueStatus + enum class S3QueueStatus : uint8_t { Processed, Failed, diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 4f8b03a5eaa..315202cc01d 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -251,7 +251,7 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr size_t max_part_count_for_partition = 0; size_t number_of_databases = 0; - for (auto [db_name, _] : databases) + for (const auto & [db_name, _] : databases) if (db_name != DatabaseCatalog::TEMPORARY_DATABASE) ++number_of_databases; /// filter out the internal database for temporary tables, system table "system.databases" behaves the same way diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index d1520c92dbc..f33418f45ac 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -653,7 +653,7 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, /// Given left_lower >= left_point, right_lower >= right_point, find if there may be a match in between left_lower and right_lower. if (left_lower + 1 < right_lower) { - /// There is an point in between: left_lower + 1 + /// There is a point in between: left_lower + 1 return {true, true}; } else if (left_lower + 1 == right_lower) diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp index 0fb2e5189d4..c600d096160 100644 --- a/src/Interpreters/SetVariants.cpp +++ b/src/Interpreters/SetVariants.cpp @@ -41,8 +41,6 @@ size_t SetVariantsTemplate::getTotalRowCount() const APPLY_FOR_SET_VARIANTS(M) #undef M } - - UNREACHABLE(); } template @@ -57,8 +55,6 @@ size_t SetVariantsTemplate::getTotalByteCount() const APPLY_FOR_SET_VARIANTS(M) #undef M } - - UNREACHABLE(); } template @@ -74,7 +70,7 @@ typename SetVariantsTemplate::Type SetVariantsTemplate::choose for (const auto & col : key_columns) { - if (const auto * nullable = checkAndGetColumn(*col)) + if (const auto * nullable = checkAndGetColumn(&*col)) { nested_key_columns.push_back(&nullable->getNestedColumn()); has_nullable_key = true; diff --git a/src/Interpreters/SetVariants.h b/src/Interpreters/SetVariants.h index ff527102080..354e04fa855 100644 --- a/src/Interpreters/SetVariants.h +++ b/src/Interpreters/SetVariants.h @@ -80,7 +80,7 @@ protected: for (const auto & col : key_columns) { - if (const auto * nullable = checkAndGetColumn(*col)) + if (const auto * nullable = checkAndGetColumn(&*col)) { actual_columns.push_back(&nullable->getNestedColumn()); null_maps.push_back(&nullable->getNullMapColumn()); @@ -254,7 +254,7 @@ struct SetVariantsTemplate: public Variant APPLY_FOR_SET_VARIANTS(M) #undef M - enum class Type + enum class Type : uint8_t { EMPTY, diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index e9fa224df11..6191eb73fd4 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -103,10 +103,12 @@ bool forAllKeys(OnExpr & expressions, Func callback) } -TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_) +TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_, TemporaryDataOnDiskScopePtr tmp_data_) : size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode}) , default_max_bytes(settings.default_max_bytes_in_join) , join_use_nulls(settings.join_use_nulls) + , cross_join_min_rows_to_compress(settings.cross_join_min_rows_to_compress) + , cross_join_min_bytes_to_compress(settings.cross_join_min_bytes_to_compress) , max_joined_block_rows(settings.max_joined_block_size_rows) , join_algorithm(settings.join_algorithm) , partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks) @@ -115,6 +117,7 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_) , temporary_files_codec(settings.temporary_files_codec) , max_memory_usage(settings.max_memory_usage) , tmp_volume(tmp_volume_) + , tmp_data(tmp_data_) { } diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 31ac58578d5..8e83233e54c 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -140,6 +141,8 @@ private: SizeLimits size_limits; const size_t default_max_bytes = 0; const bool join_use_nulls = false; + const UInt64 cross_join_min_rows_to_compress = 1000; + const UInt64 cross_join_min_bytes_to_compress = 10000; const size_t max_joined_block_rows = 0; std::vector join_algorithm; const size_t partial_merge_join_rows_in_right_blocks = 0; @@ -186,6 +189,8 @@ private: VolumePtr tmp_volume; + TemporaryDataOnDiskScopePtr tmp_data; + std::shared_ptr right_storage_join; std::shared_ptr right_kv_storage; @@ -231,7 +236,7 @@ private: public: TableJoin() = default; - TableJoin(const Settings & settings, VolumePtr tmp_volume_); + TableJoin(const Settings & settings, VolumePtr tmp_volume_, TemporaryDataOnDiskScopePtr tmp_data_); /// for StorageJoin TableJoin(SizeLimits limits, bool use_nulls, JoinKind kind, JoinStrictness strictness, @@ -257,6 +262,8 @@ public: VolumePtr getGlobalTemporaryVolume() { return tmp_volume; } + TemporaryDataOnDiskScopePtr getTempDataOnDisk() { return tmp_data; } + ActionsDAGPtr createJoinedBlockActions(ContextPtr context) const; const std::vector & getEnabledJoinAlgorithms() const { return join_algorithm; } @@ -275,6 +282,10 @@ public: bool joinUseNulls() const { return join_use_nulls; } + UInt64 crossJoinMinRowsToCompress() const { return cross_join_min_rows_to_compress; } + + UInt64 crossJoinMinBytesToCompress() const { return cross_join_min_bytes_to_compress; } + bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(kind()); diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 26a78d53aab..a74b5bba2b9 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -1,12 +1,11 @@ +#include +#include #include #include -#include #include -#include #include #include -#include #include #include #include @@ -14,6 +13,7 @@ #include #include +#include "Common/Exception.h" namespace ProfileEvents { @@ -43,10 +43,10 @@ void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssiz throw Exception(ErrorCodes::LOGICAL_ERROR, "Negative temporary data size"); } - size_t new_consumprion = stat.compressed_size + compressed_delta; - if (compressed_delta > 0 && settings.max_size_on_disk && new_consumprion > settings.max_size_on_disk) + size_t new_consumption = stat.compressed_size + compressed_delta; + if (compressed_delta > 0 && settings.max_size_on_disk && new_consumption > settings.max_size_on_disk) throw Exception(ErrorCodes::TOO_MANY_ROWS_OR_BYTES, - "Limit for temporary files size exceeded (would consume {} / {} bytes)", new_consumprion, settings.max_size_on_disk); + "Limit for temporary files size exceeded (would consume {} / {} bytes)", new_consumption, settings.max_size_on_disk); stat.compressed_size += compressed_delta; stat.uncompressed_size += uncompressed_delta; @@ -224,33 +224,26 @@ struct TemporaryFileStream::OutputWriter bool finalized = false; }; -struct TemporaryFileStream::InputReader +TemporaryFileStream::Reader::Reader(const String & path, const Block & header_, size_t size) + : in_file_buf(path, size ? std::min(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE) + , in_compressed_buf(in_file_buf) + , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION) { - InputReader(const String & path, const Block & header_, size_t size = 0) - : in_file_buf(path, size ? std::min(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE) - , in_compressed_buf(in_file_buf) - , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION) - { - LOG_TEST(getLogger("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path); - } + LOG_TEST(getLogger("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path); +} - explicit InputReader(const String & path, size_t size = 0) - : in_file_buf(path, size ? std::min(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE) - , in_compressed_buf(in_file_buf) - , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION) - { - LOG_TEST(getLogger("TemporaryFileStream"), "Reading from {}", path); - } +TemporaryFileStream::Reader::Reader(const String & path, size_t size) + : in_file_buf(path, size ? std::min(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE) + , in_compressed_buf(in_file_buf) + , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION) +{ + LOG_TEST(getLogger("TemporaryFileStream"), "Reading from {}", path); +} - Block read() - { - return in_reader.read(); - } - - ReadBufferFromFile in_file_buf; - CompressedReadBuffer in_compressed_buf; - NativeReader in_reader; -}; +Block TemporaryFileStream::Reader::read() +{ + return in_reader.read(); +} TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_) : parent(parent_) @@ -310,6 +303,12 @@ TemporaryFileStream::Stat TemporaryFileStream::finishWriting() return stat; } +TemporaryFileStream::Stat TemporaryFileStream::finishWritingAsyncSafe() +{ + std::call_once(finish_writing, [this]{ finishWriting(); }); + return stat; +} + bool TemporaryFileStream::isWriteFinished() const { assert(in_reader == nullptr || out_writer == nullptr); @@ -326,7 +325,7 @@ Block TemporaryFileStream::read() if (!in_reader) { - in_reader = std::make_unique(getPath(), header, getSize()); + in_reader = std::make_unique(getPath(), header, getSize()); } Block block = in_reader->read(); @@ -338,6 +337,17 @@ Block TemporaryFileStream::read() return block; } +std::unique_ptr TemporaryFileStream::getReadStream() +{ + if (!isWriteFinished()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been not finished"); + + if (isEof()) + return nullptr; + + return std::make_unique(getPath(), header, getSize()); +} + void TemporaryFileStream::updateAllocAndCheck() { assert(out_writer); diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 40100a62b44..488eed70da9 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -1,7 +1,12 @@ #pragma once +#include +#include #include +#include +#include +#include #include #include #include @@ -132,12 +137,25 @@ private: /* * Data can be written into this stream and then read. - * After finish writing, call `finishWriting` and then `read` to read the data. + * After finish writing, call `finishWriting` and then either call `read` or 'getReadStream'(only one of the two) to read the data. * Account amount of data written to disk in parent scope. */ class TemporaryFileStream : boost::noncopyable { public: + struct Reader + { + Reader(const String & path, const Block & header_, size_t size = 0); + + explicit Reader(const String & path, size_t size = 0); + + Block read(); + + ReadBufferFromFile in_file_buf; + CompressedReadBuffer in_compressed_buf; + NativeReader in_reader; + }; + struct Stat { /// Statistics for file @@ -154,8 +172,11 @@ public: void flush(); Stat finishWriting(); + Stat finishWritingAsyncSafe(); bool isWriteFinished() const; + std::unique_ptr getReadStream(); + Block read(); String getPath() const; @@ -184,11 +205,12 @@ private: Stat stat; + std::once_flag finish_writing; + struct OutputWriter; std::unique_ptr out_writer; - struct InputReader; - std::unique_ptr in_reader; + std::unique_ptr in_reader; }; } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 2b8e8bef6d4..9ca521a4ab3 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -458,6 +458,31 @@ void ThreadStatus::resetPerformanceCountersLastUsage() taskstats->reset(); } +void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) +{ +#if !defined(SANITIZER) && !defined(__APPLE__) + /// profilers are useless without trace collector + auto context = Context::getGlobalContextInstance(); + if (!context->hasTraceCollector()) + return; + + try + { + if (global_profiler_real_time_period > 0) + query_profiler_real = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_real_time_period)); + + if (global_profiler_cpu_time_period > 0) + query_profiler_cpu = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_cpu_time_period)); + } + catch (...) + { + tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); + } +#endif +} + void ThreadStatus::initQueryProfiler() { if (internal_thread) diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 01bedf34f15..2c56eb79089 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -53,6 +54,18 @@ ColumnsDescription TraceLogElement::getColumnsDescription() }; } +NamesAndAliases TraceLogElement::getNamesAndAliases() +{ + String build_id_hex; +#if defined(__ELF__) && !defined(OS_FREEBSD) + build_id_hex = SymbolIndex::instance().getBuildIDHex(); +#endif + return + { + {"build_id", std::make_shared(), "\'" + build_id_hex + "\'"}, + }; +} + void TraceLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index 418b8d546a0..c4314cfd7b0 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -39,7 +39,7 @@ struct TraceLogElement static std::string name() { return "TraceLog"; } static ColumnsDescription getColumnsDescription(); - static NamesAndAliases getNamesAndAliases() { return {}; } + static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; }; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 03df7283992..c21c4d34fa8 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index 8258412f1a6..ec4b029eee9 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -164,6 +164,12 @@ public: void pushNotIn(CNFQuery::AtomicFormula & atom); +/// Reduces CNF groups by removing mutually exclusive atoms +/// found across groups, in case other atoms are identical. +/// Might require multiple passes to complete reduction. +/// +/// Example: +/// (x OR y) AND (x OR !y) -> x template TAndGroup reduceOnceCNFStatements(const TAndGroup & groups) { @@ -175,10 +181,19 @@ TAndGroup reduceOnceCNFStatements(const TAndGroup & groups) bool inserted = false; for (const auto & atom : group) { - copy.erase(atom); using AtomType = std::decay_t; AtomType negative_atom(atom); negative_atom.negative = !atom.negative; + + // Sikpping erase-insert for mutually exclusive atoms within + // single group, since it won't insert negative atom, which + // will break the logic of this rule + if (copy.contains(negative_atom)) + { + continue; + } + + copy.erase(atom); copy.insert(negative_atom); if (groups.contains(copy)) @@ -209,6 +224,10 @@ bool isCNFGroupSubset(const TOrGroup & left, const TOrGroup & right) return true; } +/// Removes CNF groups if subset group is found in CNF. +/// +/// Example: +/// (x OR y) AND (x) -> x template TAndGroup filterCNFSubsets(const TAndGroup & groups) { diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 2d0c9ba1a37..c331c8640d6 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -174,10 +173,9 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) const auto & erase_position = group_exprs.begin() + i; group_exprs.erase(erase_position); const auto & insert_position = group_exprs.begin() + i; - std::remove_copy_if( - std::begin(args_ast->children), std::end(args_ast->children), - std::inserter(group_exprs, insert_position), is_literal - ); + (void)std::remove_copy_if( + std::begin(args_ast->children), std::end(args_ast->children), + std::inserter(group_exprs, insert_position), is_literal); } else if (is_literal(group_exprs[i])) { diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 50c28fbc8b2..a3c5a7ed3ed 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,6 +1188,33 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } + /// Check for dynamic subcolums in unknown required columns. + if (!unknown_required_source_columns.empty()) + { + for (const NameAndTypePair & pair : source_columns_ordinary) + { + if (!pair.type->hasDynamicSubcolumns()) + continue; + + for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) + { + auto [column_name, dynamic_subcolumn_name] = Nested::splitName(*it); + + if (column_name == pair.name) + { + if (auto dynamic_subcolumn_type = pair.type->tryGetSubcolumnType(dynamic_subcolumn_name)) + { + source_columns.emplace_back(*it, dynamic_subcolumn_type); + it = unknown_required_source_columns.erase(it); + continue; + } + } + + ++it; + } + } + } + if (!unknown_required_source_columns.empty()) { constexpr auto format_string = "Missing columns: {} while processing query: '{}', required columns:{}{}"; diff --git a/src/Interpreters/WhereConstraintsOptimizer.cpp b/src/Interpreters/WhereConstraintsOptimizer.cpp index 5a0102f1ee7..456cf76b987 100644 --- a/src/Interpreters/WhereConstraintsOptimizer.cpp +++ b/src/Interpreters/WhereConstraintsOptimizer.cpp @@ -27,7 +27,7 @@ WhereConstraintsOptimizer::WhereConstraintsOptimizer( namespace { -enum class MatchState +enum class MatchState : uint8_t { FULL_MATCH, /// a = b NOT_MATCH, /// a = not b @@ -91,6 +91,22 @@ bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const Comparis return false; } +bool checkIfGroupAlwaysTrueAtoms(const CNFQuery::OrGroup & group) +{ + /// Filters out groups containing mutually exclusive atoms, + /// since these groups are always True + + for (const auto & atom : group) + { + auto negated(atom); + negated.negative = !atom.negative; + if (group.contains(negated)) + { + return true; + } + } + return false; +} bool checkIfAtomAlwaysFalseFullMatch(const CNFQuery::AtomicFormula & atom, const ConstraintsDescription & constraints_description) { @@ -158,7 +174,8 @@ void WhereConstraintsOptimizer::perform() .filterAlwaysTrueGroups([&compare_graph, this](const auto & group) { /// remove always true groups from CNF - return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) && !checkIfGroupAlwaysTrueGraph(group, compare_graph); + return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) + && !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group); }) .filterAlwaysFalseAtoms([&compare_graph, this](const auto & atom) { diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index 05269c9d2c3..c26e4517c9a 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -30,8 +30,8 @@ struct WindowFunctionDescription struct WindowFrame { - enum class FrameType { ROWS, GROUPS, RANGE }; - enum class BoundaryType { Unbounded, Current, Offset }; + enum class FrameType : uint8_t { ROWS, GROUPS, RANGE }; + enum class BoundaryType : uint8_t { Unbounded, Current, Offset }; // This flag signifies that the frame properties were not set explicitly by // user, but the fields of this structure still have to contain proper values diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 25085ff4823..9363e3d83eb 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -504,7 +505,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID else if (const DataTypeVariant * type_variant = typeid_cast(&type)) { /// If we have type hint and Variant contains such type, no need to convert field. - if (from_type_hint && type_variant->tryGetVariantDiscriminator(*from_type_hint)) + if (from_type_hint && type_variant->tryGetVariantDiscriminator(from_type_hint->getName())) return src; /// Create temporary column and check if we can insert this field to the variant. @@ -513,6 +514,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (col->tryInsert(src)) return src; } + else if (isDynamic(type)) + { + /// We can insert any field to Dynamic column. + return src; + } /// Conversion from string by parsing. if (src.getType() == Field::Types::String) diff --git a/src/Interpreters/examples/hash_map_string_3.cpp b/src/Interpreters/examples/hash_map_string_3.cpp index 57e36bed545..44ee3542bd9 100644 --- a/src/Interpreters/examples/hash_map_string_3.cpp +++ b/src/Interpreters/examples/hash_map_string_3.cpp @@ -96,7 +96,7 @@ inline bool operator==(StringRef_CompareAlwaysTrue, StringRef_CompareAlwaysTrue) struct FastHash64 { - static inline uint64_t mix(uint64_t h) + static uint64_t mix(uint64_t h) { h ^= h >> 23; h *= 0x2127599bf4325c37ULL; diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index fbcb57b6125..e372f036073 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -65,7 +65,7 @@ bool isSupportedAlterTypeForOnClusterDDLQuery(int type) BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, const DDLQueryOnClusterParams & params) { - OpenTelemetry::SpanHolder span(__FUNCTION__, OpenTelemetry::PRODUCER); + OpenTelemetry::SpanHolder span(__FUNCTION__, OpenTelemetry::SpanKind::PRODUCER); if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported"); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c785de61a18..9c5436517ab 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -103,7 +103,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; extern const int QUERY_WAS_CANCELLED; - extern const int INCORRECT_DATA; extern const int SYNTAX_ERROR; extern const int SUPPORT_IS_DISABLED; extern const int INCORRECT_QUERY; @@ -783,6 +782,7 @@ static std::tuple executeQueryImpl( catch (const Exception & e) { if (e.code() == ErrorCodes::SYNTAX_ERROR) + /// Don't print the original query text because it may contain sensitive data. throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent AST formatting: the query:\n{}\ncannot parse.", formatted1); @@ -807,12 +807,14 @@ static std::tuple executeQueryImpl( bool is_create_parameterized_view = false; if (const auto * create_query = ast->as()) + { is_create_parameterized_view = create_query->isParameterizedView(); + } else if (const auto * explain_query = ast->as()) { - assert(!explain_query->children.empty()); - if (const auto * create_of_explain_query = explain_query->children[0]->as()) - is_create_parameterized_view = create_of_explain_query->isParameterizedView(); + if (!explain_query->children.empty()) + if (const auto * create_of_explain_query = explain_query->children[0]->as()) + is_create_parameterized_view = create_of_explain_query->isParameterizedView(); } /// Replace ASTQueryParameter with ASTLiteral for prepared statements. @@ -1091,6 +1093,15 @@ static std::tuple executeQueryImpl( && (ast->as() || ast->as()); QueryCache::Usage query_cache_usage = QueryCache::Usage::None; + /// If the query runs with "use_query_cache = 1", we first probe if the query cache already contains the query result (if yes: + /// return result from cache). If doesn't, we execute the query normally and write the result into the query cache. Both steps use a + /// hash of the AST, the current database and the settings as cache key. Unfortunately, the settings are in some places internally + /// modified between steps 1 and 2 (= during query execution) - this is silly but hard to forbid. As a result, the hashes no longer + /// match and the cache is rendered ineffective. Therefore make a copy of the settings and use it for steps 1 and 2. + std::optional settings_copy; + if (can_use_query_cache) + settings_copy = settings; + if (!async_insert) { /// If it is a non-internal SELECT, and passive (read) use of the query cache is enabled, and the cache knows the query, then set @@ -1099,7 +1110,7 @@ static std::tuple executeQueryImpl( { if (can_use_query_cache && settings.enable_reads_from_query_cache) { - QueryCache::Key key(ast, context->getUserID(), context->getCurrentRoles()); + QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles()); QueryCache::Reader reader = query_cache->createReader(key); if (reader.hasCacheEntryForKey()) { @@ -1183,7 +1194,9 @@ static std::tuple executeQueryImpl( } if (auto * create_interpreter = typeid_cast(&*interpreter)) + { create_interpreter->setIsRestoreFromBackup(flags.distributed_backup_restore); + } { std::unique_ptr span; @@ -1222,7 +1235,7 @@ static std::tuple executeQueryImpl( && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save)) { QueryCache::Key key( - ast, res.pipeline.getHeader(), + ast, context->getCurrentDatabase(), *settings_copy, res.pipeline.getHeader(), context->getUserID(), context->getCurrentRoles(), settings.query_cache_share_between_users, std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl), @@ -1249,38 +1262,9 @@ static std::tuple executeQueryImpl( } } } - } } } - // Here we check if our our projections contain force_optimize_projection_name - if (!settings.force_optimize_projection_name.value.empty()) - { - bool found = false; - std::set projections; - { - const auto & access_info = context->getQueryAccessInfo(); - std::lock_guard lock(access_info.mutex); - projections = access_info.projections; - } - - for (const auto &projection : projections) - { - // projection value has structure like: .. - // We need to get only the projection name - size_t last_dot_pos = projection.find_last_of('.'); - std::string projection_name = (last_dot_pos != std::string::npos) ? projection.substr(last_dot_pos + 1) : projection; - if (settings.force_optimize_projection_name.value == projection_name) - { - found = true; - break; - } - } - - if (!found) - throw Exception(ErrorCodes::INCORRECT_DATA, "Projection {} is specified in setting force_optimize_projection_name but not used", - settings.force_optimize_projection_name.value); - } if (process_list_entry) { @@ -1418,7 +1402,16 @@ void executeQuery( const char * begin; const char * end; - istr.nextIfAtEnd(); + try + { + istr.nextIfAtEnd(); + } + catch (...) + { + /// If buffer contains invalid data and we failed to decompress, we still want to have some information about the query in the log. + logQuery("", context, /* internal = */ false, QueryProcessingStage::Complete); + throw; + } size_t max_query_size = context->getSettingsRef().max_query_size; @@ -1522,9 +1515,16 @@ void executeQuery( if (output_format) handle_exception_in_output_format(*output_format, format_name, context, output_format_settings); } + /// The timezone was already set before query was processed, + /// But `session_timezone` setting could be modified in the query itself, so we update the value. + result_details.timezone = DateLUT::instance().getTimeZone(); throw; } + /// The timezone was already set before query was processed, + /// But `session_timezone` setting could be modified in the query itself, so we update the value. + result_details.timezone = DateLUT::instance().getTimeZone(); + auto & pipeline = streams.pipeline; std::unique_ptr compressed_buffer; diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index c009808de3f..b77fc5aee1e 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace DB diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 27c364073ae..3529863a623 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -40,7 +40,7 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio if (!settings.allow_experimental_object_type) { - if (data_type.hasDynamicSubcolumns()) + if (data_type.hasDynamicSubcolumnsDeprecated()) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -107,6 +107,18 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio } } } + + if (!settings.allow_experimental_dynamic_type) + { + if (data_type.hasDynamicSubcolumns()) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental Dynamic type is not allowed. " + "Set setting allow_experimental_dynamic_type = 1 in order to allow it", + data_type.getName()); + } + } }; validate_callback(*type_to_check); diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index ffb59bfa457..e2d2bc97ff7 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -21,6 +21,7 @@ struct DataTypeValidationSettings , allow_experimental_variant_type(settings.allow_experimental_variant_type) , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) + , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type) { } @@ -30,6 +31,7 @@ struct DataTypeValidationSettings bool allow_experimental_variant_type = true; bool allow_suspicious_variant_types = true; bool validate_nested_types = true; + bool allow_experimental_dynamic_type = true; }; void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings); diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp index cceb0650fcd..3d60723a167 100644 --- a/src/Interpreters/replaceForPositionalArguments.cpp +++ b/src/Interpreters/replaceForPositionalArguments.cpp @@ -44,7 +44,7 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel pos = value; else { - if (static_cast(std::abs(value)) > columns.size()) + if (value < -static_cast(columns.size())) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]", diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index d75786f33b9..7b19d338ee8 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -166,7 +166,7 @@ void getBlockSortPermutationImpl(const Block & block, const SortDescription & de for (const auto & column_with_sort_description : columns_with_sort_descriptions) { - while (!ranges.empty() && limit && limit <= ranges.back().first) + while (!ranges.empty() && limit && limit <= ranges.back().from) ranges.pop_back(); if (ranges.empty()) diff --git a/src/Interpreters/tests/gtest_actions_visitor.cpp b/src/Interpreters/tests/gtest_actions_visitor.cpp new file mode 100644 index 00000000000..3de39ae6bfa --- /dev/null +++ b/src/Interpreters/tests/gtest_actions_visitor.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + + +TEST(ActionsVisitor, VisitLiteral) +{ + DataTypePtr date_type = std::make_shared(); + DataTypePtr expect_type = std::make_shared(); + const NamesAndTypesList name_and_types = + { + {"year", date_type} + }; + + const auto ast = std::make_shared(19870); + auto context = Context::createCopy(getContext().context); + NamesAndTypesList aggregation_keys; + ColumnNumbersList aggregation_keys_indexes_list; + AggregationKeysInfo info(aggregation_keys, aggregation_keys_indexes_list, GroupByKind::NONE); + SizeLimits size_limits_for_set; + ActionsMatcher::Data visitor_data( + context, + size_limits_for_set, + size_t(0), + name_and_types, + std::make_shared(name_and_types), + std::make_shared(), + false /* no_subqueries */, + false /* no_makeset */, + false /* only_consts */, + info); + ActionsVisitor(visitor_data).visit(ast); + auto actions = visitor_data.getActions(); + ASSERT_EQ(actions->getResultColumns().back().type->getTypeId(), expect_type->getTypeId()); +} + +TEST(ActionsVisitor, VisitLiteralWithType) +{ + DataTypePtr date_type = std::make_shared(); + const NamesAndTypesList name_and_types = + { + {"year", date_type} + }; + + const auto ast = std::make_shared(19870, date_type); + auto context = Context::createCopy(getContext().context); + NamesAndTypesList aggregation_keys; + ColumnNumbersList aggregation_keys_indexes_list; + AggregationKeysInfo info(aggregation_keys, aggregation_keys_indexes_list, GroupByKind::NONE); + SizeLimits size_limits_for_set; + ActionsMatcher::Data visitor_data( + context, + size_limits_for_set, + size_t(0), + name_and_types, + std::make_shared(name_and_types), + std::make_shared(), + false /* no_subqueries */, + false /* no_makeset */, + false /* only_consts */, + info); + ActionsVisitor(visitor_data).visit(ast); + auto actions = visitor_data.getActions(); + ASSERT_EQ(actions->getResultColumns().back().type->getTypeId(), date_type->getTypeId()); +} diff --git a/src/Interpreters/tests/gtest_convertFieldToType.cpp b/src/Interpreters/tests/gtest_convertFieldToType.cpp index ea1c5c43a25..c8a9d5aa2c0 100644 --- a/src/Interpreters/tests/gtest_convertFieldToType.cpp +++ b/src/Interpreters/tests/gtest_convertFieldToType.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include @@ -24,9 +23,7 @@ std::ostream & operator << (std::ostream & ostr, const ConvertFieldToTypeTestPar { return ostr << "{" << "\n\tfrom_type : " << params.from_type - << "\n\tfrom_value : " << params.from_value << "\n\tto_type : " << params.to_type - << "\n\texpected : " << (params.expected_value ? *params.expected_value : Field()) << "\n}"; } diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index f794ad336e2..0bd4b94d999 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -90,10 +90,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log std::cerr << "Logging " << log_level_string << " to " << log_path << ext << std::endl; auto log_level = Poco::Logger::parseLevel(log_level_string); - if (log_level > max_log_level) - { - max_log_level = log_level; - } + max_log_level = std::max(log_level, max_log_level); // Set up two channel chains. log_file = new Poco::FileChannel; @@ -128,10 +125,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log // NOTE: we don't use notice & critical in the code, so in practice error log collects fatal & error & warning. // (!) Warnings are important, they require attention and should never be silenced / ignored. auto errorlog_level = Poco::Logger::parseLevel(config.getString("logger.errorlog_level", "notice")); - if (errorlog_level > max_log_level) - { - max_log_level = errorlog_level; - } + max_log_level = std::max(errorlog_level, max_log_level); std::string ext; if (config.getRawString("logger.stream_compress", "false") == "true") @@ -165,10 +159,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log if (config.getBool("logger.use_syslog", false)) { auto syslog_level = Poco::Logger::parseLevel(config.getString("logger.syslog_level", log_level_string)); - if (syslog_level > max_log_level) - { - max_log_level = syslog_level; - } + max_log_level = std::max(syslog_level, max_log_level); if (config.has("logger.syslog.address")) { @@ -215,10 +206,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log auto console_log_level_string = config.getString("logger.console_log_level", log_level_string); auto console_log_level = Poco::Logger::parseLevel(console_log_level_string); - if (console_log_level > max_log_level) - { - max_log_level = console_log_level; - } + max_log_level = std::max(console_log_level, max_log_level); Poco::AutoPtr pf; if (config.getString("logger.formatting.type", "") == "json") @@ -275,7 +263,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } } #ifndef WITHOUT_TEXT_LOG - if (config.has("text_log")) + if (allowTextLog() && config.has("text_log")) { String text_log_level_str = config.getString("text_log.level", "trace"); int text_log_level = Poco::Logger::parseLevel(text_log_level_str); @@ -323,8 +311,7 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log const auto log_level_string = config.getString("logger.level", "trace"); int log_level = Poco::Logger::parseLevel(log_level_string); - if (log_level > max_log_level) - max_log_level = log_level; + max_log_level = std::max(log_level, max_log_level); if (log_file) split->setLevel("log", log_level); @@ -342,8 +329,7 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log if (error_log_file) { int errorlog_level = Poco::Logger::parseLevel(config.getString("logger.errorlog_level", "notice")); - if (errorlog_level > max_log_level) - max_log_level = errorlog_level; + max_log_level = std::max(errorlog_level, max_log_level); split->setLevel("errorlog", errorlog_level); } @@ -352,8 +338,7 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log if (config.getBool("logger.use_syslog", false)) { syslog_level = Poco::Logger::parseLevel(config.getString("logger.syslog_level", log_level_string)); - if (syslog_level > max_log_level) - max_log_level = syslog_level; + max_log_level = std::max(syslog_level, max_log_level); } split->setLevel("syslog", syslog_level); diff --git a/src/Loggers/Loggers.h b/src/Loggers/Loggers.h index 9eff731a4c5..9923d66ebcb 100644 --- a/src/Loggers/Loggers.h +++ b/src/Loggers/Loggers.h @@ -23,6 +23,10 @@ public: /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); + virtual ~Loggers() = default; + +protected: + virtual bool allowTextLog() const { return true; } private: Poco::AutoPtr log_file; diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index fee33781c27..dc51a13e01f 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -107,6 +107,10 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) [[maybe_unused]] bool push_result = logs_queue->emplace(std::move(columns)); } + auto text_log_locked = text_log.lock(); + if (!text_log_locked) + return; + /// Also log to system.text_log table, if message is not too noisy auto text_log_max_priority_loaded = text_log_max_priority.load(std::memory_order_relaxed); if (text_log_max_priority_loaded && msg.getPriority() <= text_log_max_priority_loaded) @@ -146,10 +150,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) #undef SET_VALUE_IF_EXISTS - std::shared_ptr> text_log_locked{}; - text_log_locked = text_log.lock(); - if (text_log_locked) - text_log_locked->push(std::move(elem)); + text_log_locked->push(std::move(elem)); } #endif } diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index b75554eefc4..7ca27cf6584 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 1799b75fce4..a3cab1688c2 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -235,7 +235,7 @@ protected: class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster { public: - enum class AlterObjectType + enum class AlterObjectType : uint8_t { TABLE, DATABASE, diff --git a/src/Parsers/ASTBackupQuery.cpp b/src/Parsers/ASTBackupQuery.cpp index bdb78eaf971..5a5cb97fa5f 100644 --- a/src/Parsers/ASTBackupQuery.cpp +++ b/src/Parsers/ASTBackupQuery.cpp @@ -180,7 +180,7 @@ namespace if (settings) changes = assert_cast(settings.get())->changes; - boost::remove_erase_if( + std::erase_if( changes, [](const SettingChange & change) { diff --git a/src/Parsers/ASTCheckQuery.h b/src/Parsers/ASTCheckQuery.h index eca08b2b094..9dc4155c39d 100644 --- a/src/Parsers/ASTCheckQuery.h +++ b/src/Parsers/ASTCheckQuery.h @@ -38,11 +38,7 @@ struct ASTCheckTableQuery : public ASTQueryWithTableAndOutput protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - std::string nl_or_nothing = settings.one_line ? "" : "\n"; - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - std::string nl_or_ws = settings.one_line ? " " : "\n"; - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (settings.hilite ? hilite_none : ""); if (table) @@ -83,11 +79,7 @@ struct ASTCheckAllTablesQuery : public ASTQueryWithOutput protected: void formatQueryImpl(const FormatSettings & settings, FormatState & /* state */, FormatStateStacked frame) const override { - std::string nl_or_nothing = settings.one_line ? "" : "\n"; - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - std::string nl_or_ws = settings.one_line ? " " : "\n"; - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "CHECK ALL TABLES" << (settings.hilite ? hilite_none : ""); } }; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 8d4373530bc..3e5c6a9d86e 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -170,6 +170,8 @@ ASTPtr ASTColumns::clone() const res->set(res->projections, projections->clone()); if (primary_key) res->set(res->primary_key, primary_key->clone()); + if (primary_key_from_columns) + res->set(res->primary_key_from_columns, primary_key_from_columns->clone()); return res; } diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index ecb9ad8169b..cf912d66b44 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -50,7 +50,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << "TEMPORARY "; if (has_all_tables) - settings.ostr << "ALL TABLES "; + settings.ostr << "ALL TABLES FROM "; else if (!table && !database_and_tables && database) settings.ostr << "DATABASE "; else if (is_dictionary) diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index 701bde8cebd..eb095b5dbbc 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -40,8 +40,6 @@ public: case TableOverride: return "EXPLAIN TABLE OVERRIDE"; case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION"; } - - UNREACHABLE(); } static ExplainKind fromString(const String & str) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index cdc9a471e98..602ef8c232b 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -671,7 +671,8 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (written) { - return finishFormatWithWindow(settings, state, frame); + finishFormatWithWindow(settings, state, frame); + return; } settings.ostr << (settings.hilite ? hilite_function : "") << name; @@ -753,8 +754,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format settings.ostr << (settings.hilite ? hilite_function : "") << ')'; settings.ostr << (settings.hilite ? hilite_none : ""); - - return finishFormatWithWindow(settings, state, frame); + finishFormatWithWindow(settings, state, frame); } bool ASTFunction::hasSecretParts() const diff --git a/src/Parsers/ASTKillQueryQuery.h b/src/Parsers/ASTKillQueryQuery.h index 20db5576fa4..89ba474e107 100644 --- a/src/Parsers/ASTKillQueryQuery.h +++ b/src/Parsers/ASTKillQueryQuery.h @@ -9,7 +9,7 @@ namespace DB class ASTKillQueryQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster { public: - enum class Type + enum class Type : uint8_t { Query, /// KILL QUERY Mutation, /// KILL MUTATION diff --git a/src/Parsers/ASTLiteral.h b/src/Parsers/ASTLiteral.h index 0c55aceb068..b957e435e2d 100644 --- a/src/Parsers/ASTLiteral.h +++ b/src/Parsers/ASTLiteral.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -17,7 +18,14 @@ class ASTLiteral : public ASTWithAlias public: explicit ASTLiteral(Field value_) : value(std::move(value_)) {} + // This methond and the custom_type are only used for Apache Gluten, + explicit ASTLiteral(Field value_, DataTypePtr & type_) : value(std::move(value_)) + { + custom_type = type_; + } + Field value; + DataTypePtr custom_type; /// For ConstantExpressionTemplate std::optional begin; diff --git a/src/Parsers/ASTSelectIntersectExceptQuery.h b/src/Parsers/ASTSelectIntersectExceptQuery.h index db00fb3df87..d2e3b1a7172 100644 --- a/src/Parsers/ASTSelectIntersectExceptQuery.h +++ b/src/Parsers/ASTSelectIntersectExceptQuery.h @@ -14,7 +14,7 @@ public: ASTPtr clone() const override; - enum class Operator + enum class Operator : uint8_t { UNKNOWN, EXCEPT_ALL, diff --git a/src/Parsers/ASTSetQuery.h b/src/Parsers/ASTSetQuery.h index 42d63944b4f..b52662b246e 100644 --- a/src/Parsers/ASTSetQuery.h +++ b/src/Parsers/ASTSetQuery.h @@ -39,7 +39,7 @@ public: QueryKind getQueryKind() const override { return QueryKind::Set; } void appendColumnName(WriteBuffer & ostr) const override; - void appendColumnNameWithoutAlias(WriteBuffer & ostr) const override { return appendColumnName(ostr); } + void appendColumnNameWithoutAlias(WriteBuffer & ostr) const override { appendColumnName(ostr); } }; } diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index 8ca2ee0efae..e782bad797e 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -285,8 +285,6 @@ void ASTTablesInSelectQueryElement::formatImpl(const FormatSettings & settings, void ASTTablesInSelectQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - for (const auto & child : children) child->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTTablesInSelectQuery.h b/src/Parsers/ASTTablesInSelectQuery.h index 39ef0b21f6d..f3f329ca2b6 100644 --- a/src/Parsers/ASTTablesInSelectQuery.h +++ b/src/Parsers/ASTTablesInSelectQuery.h @@ -85,7 +85,7 @@ struct ASTTableJoin : public IAST /// Specification of ARRAY JOIN. struct ASTArrayJoin : public IAST { - enum class Kind + enum class Kind : uint8_t { Inner, /// If array is empty, row will not present (default). Left, /// If array is empty, leave row with default values instead of array elements. diff --git a/src/Parsers/Access/ASTSetRoleQuery.h b/src/Parsers/Access/ASTSetRoleQuery.h index 1146205af2b..51cdedda29d 100644 --- a/src/Parsers/Access/ASTSetRoleQuery.h +++ b/src/Parsers/Access/ASTSetRoleQuery.h @@ -13,7 +13,7 @@ class ASTRolesOrUsersSet; class ASTSetRoleQuery : public IAST { public: - enum class Kind + enum class Kind : uint8_t { SET_ROLE, SET_ROLE_DEFAULT, diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 799cd65dd5c..e79d14f860a 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -219,7 +219,7 @@ namespace void throwIfNotGrantable(AccessRightsElements & elements) { - boost::range::remove_erase_if(elements, [](AccessRightsElement & element) + std::erase_if(elements, [](AccessRightsElement & element) { if (element.empty()) return true; diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 3bc1b3a981f..278c1e00e9e 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -7,19 +7,11 @@ add_headers_and_sources(clickhouse_parsers ./Kusto) add_headers_and_sources(clickhouse_parsers ./PRQL) add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) -target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) +target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access) if (TARGET ch_rust::prql) target_link_libraries(clickhouse_parsers PRIVATE ch_rust::prql) endif () -if (USE_DEBUG_HELPERS) - # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. - # Prefixing "SHELL:" will force it to use the original text. - set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${ClickHouse_SOURCE_DIR}/base\" -include \"${ClickHouse_SOURCE_DIR}/src/Parsers/iostream_debug_helpers.h\"") - # Use generator expression as we don't want to pollute CMAKE_CXX_FLAGS, which will interfere with CMake check system. - add_compile_options($<$:${INCLUDE_DEBUG_HELPERS}>) -endif () - if(ENABLE_EXAMPLES) add_subdirectory(examples) endif() diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index de926e83024..416f696323c 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include "Parsers/CommonParsers.h" @@ -170,9 +170,17 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - /// Identifier in backquotes or in double quotes + /// Identifier in backquotes or in double quotes or in English-style Unicode double quotes if (pos->type == TokenType::QuotedIdentifier) { + /// The case of Unicode quotes. No escaping is supported. Assuming UTF-8. + if (*pos->begin == '\xE2' && pos->size() > 6) /// Empty identifiers are not allowed. + { + node = std::make_shared(String(pos->begin + 3, pos->end - 3)); + ++pos; + return true; + } + ReadBufferFromMemory buf(pos->begin, pos->size()); String s; @@ -1140,18 +1148,26 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (pos->type == TokenType::StringLiteral) { - if (*pos->begin == 'x' || *pos->begin == 'X') + char first_char = *pos->begin; + + if (first_char == 'x' || first_char == 'X') { constexpr size_t word_size = 2; return makeHexOrBinStringLiteral(pos, node, true, word_size); } - if (*pos->begin == 'b' || *pos->begin == 'B') + if (first_char == 'b' || first_char == 'B') { constexpr size_t word_size = 8; return makeHexOrBinStringLiteral(pos, node, false, word_size); } + /// The case of Unicode quotes. No escaping is supported. Assuming UTF-8. + if (first_char == '\xE2' && pos->size() >= 6) + { + return makeStringLiteral(pos, node, String(pos->begin + 3, pos->end - 3)); + } + ReadBufferFromMemory in(pos->begin, pos->size()); try diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 007c2855a13..7cdfaf988a3 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include @@ -146,7 +146,7 @@ static bool parseOperator(IParser::Pos & pos, std::string_view op, Expected & ex return false; } -enum class SubqueryFunctionType +enum class SubqueryFunctionType : uint8_t { NONE, ANY, @@ -457,7 +457,7 @@ namespace } -enum class Action +enum class Action : uint8_t { NONE, OPERAND, @@ -468,7 +468,7 @@ enum class Action * Operators can be grouped into some type if they have similar behaviour. * Certain operators are unique in terms of their behaviour, so they are assigned a separate type. */ -enum class OperatorType +enum class OperatorType : uint8_t { None, Comparison, @@ -521,7 +521,7 @@ static std::shared_ptr makeASTFunction(Operator & op, Args &&... ar return ast_function; } -enum class Checkpoint +enum class Checkpoint : uint8_t { None, Interval, diff --git a/src/Parsers/FieldFromAST.cpp b/src/Parsers/FieldFromAST.cpp index a81bf45a8be..ad1eab49eeb 100644 --- a/src/Parsers/FieldFromAST.cpp +++ b/src/Parsers/FieldFromAST.cpp @@ -51,7 +51,7 @@ public: { /// We allow to not hide type of the disk, e.g. disk(type = s3, ...) /// and also nested disk, e.g. disk = 'disk_name' - return arg_name != "type" && arg_name != "disk"; + return arg_name != "type" && arg_name != "disk" && arg_name != "name" ; }; for (const auto & arg : disk_function_args) diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp index ddd210b01ec..857009680b1 100644 --- a/src/Parsers/IParser.cpp +++ b/src/Parsers/IParser.cpp @@ -14,10 +14,7 @@ IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) { depth = rhs.depth; max_depth = rhs.max_depth; - - if (rhs.backtracks > backtracks) - backtracks = rhs.backtracks; - + backtracks = std::max(backtracks, rhs.backtracks); max_backtracks = rhs.max_backtracks; if (rhs < *this) diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 0ae862fee75..cb3684bfe5d 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -22,7 +22,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -enum class Highlight +enum class Highlight : uint8_t { none = 0, keyword, diff --git a/src/Parsers/IdentifierQuotingStyle.h b/src/Parsers/IdentifierQuotingStyle.h index 5e31969f7fb..48be809fc8f 100644 --- a/src/Parsers/IdentifierQuotingStyle.h +++ b/src/Parsers/IdentifierQuotingStyle.h @@ -6,7 +6,7 @@ namespace DB /// Method to quote identifiers. /// NOTE There could be differences in escaping rules inside quotes. Escaping rules may not match that required by specific external DBMS. -enum class IdentifierQuotingStyle +enum class IdentifierQuotingStyle : uint8_t { None, /// Write as-is, without quotes. Backticks, /// `clickhouse` style diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index f5069e80745..b518c6fb88d 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -28,7 +28,7 @@ private: class IParserKQLFunction { public: - enum class ArgumentState + enum class ArgumentState : uint8_t { Parsed, Raw diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 16436d38d32..0eb83b8b5ac 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index cca4fb3ce33..3b1f96b4ea6 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -137,7 +137,6 @@ bool DatatypeInt::convertImpl(String & out, IParser::Pos & pos) const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; - String guid_str; ++pos; if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index 77ed110700b..05de6e120e2 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -240,7 +240,7 @@ void inline getTokens(String format, std::vector & res) pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); } // Cover the last (or only) token - if (str.length() > 0) + if (!str.empty()) { token = str; res.insert(res.begin(), token); diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp index c4f84d576cb..19625f6624d 100644 --- a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index fbf2110e664..e508b69bdff 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -62,49 +63,51 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery kql_p; - ASTPtr select; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + ParserToken lparen(TokenType::OpeningRoundBracket); - auto begin = pos; - auto paren_count = 0; + ASTPtr string_literal; + ParserStringLiteral parser_string_literal; + + if (!lparen.ignore(pos, expected)) + return false; + + size_t paren_count = 0; String kql_statement; - - if (s_lparen.ignore(pos, expected)) + if (parser_string_literal.parse(pos, string_literal, expected)) { - if (pos->type == TokenType::HereDoc) - { - kql_statement = String(pos->begin + 2, pos->end - 2); - } - else - { - ++paren_count; - auto pos_start = pos; - while (isValidKQLPos(pos)) - { - if (pos->type == TokenType::ClosingRoundBracket) - --paren_count; - if (pos->type == TokenType::OpeningRoundBracket) - ++paren_count; - - if (paren_count == 0) - break; - ++pos; - } - kql_statement = String(pos_start->begin, (--pos)->end); - } - ++pos; - Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); - - if (kql_p.parse(pos_kql, select, expected)) - { - node = select; - ++pos; - return true; - } + kql_statement = typeid_cast(*string_literal).value.safeGet(); } - pos = begin; - return false; + else + { + ++paren_count; + auto pos_start = pos; + while (isValidKQLPos(pos)) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + ++pos; + } + if (!isValidKQLPos(pos)) + { + return false; + } + --pos; + kql_statement = String(pos_start->begin, pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.data(), kql_statement.data() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); + Expected kql_expected; + kql_expected.enable_highlighting = false; + if (!ParserKQLWithUnionQuery().parse(pos_kql, node, kql_expected)) + return false; + ++pos; + return true; } } diff --git a/src/Parsers/Kusto/parseKQLQuery.cpp b/src/Parsers/Kusto/parseKQLQuery.cpp index 34a009873f8..34076168480 100644 --- a/src/Parsers/Kusto/parseKQLQuery.cpp +++ b/src/Parsers/Kusto/parseKQLQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 0e6db7a40e5..5f2bd50524c 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include namespace DB @@ -11,8 +11,9 @@ namespace { /// This must be consistent with functions in ReadHelpers.h -template -Token quotedString(const char *& pos, const char * const token_begin, const char * const end) +template +Token quotedString(const char *& pos, const char * const token_begin, const char * const end, + TokenType success_token, TokenType error_token) { ++pos; while (true) @@ -41,7 +42,34 @@ Token quotedString(const char *& pos, const char * const token_begin, const char continue; } - UNREACHABLE(); + chassert(false); + } +} + +Token quotedStringWithUnicodeQuotes(const char *& pos, const char * const token_begin, const char * const end, + char expected_end_byte, TokenType success_token, TokenType error_token) +{ + /// ‘: e2 80 98 + /// ’: e2 80 99 + /// “: e2 80 9c + /// ”: e2 80 9d + + while (true) + { + pos = find_first_symbols<'\xE2'>(pos, end); + if (pos + 2 >= end) + return Token(error_token, token_begin, end); + /// Empty identifiers are not allowed, while empty strings are. + if (success_token == TokenType::QuotedIdentifier && pos + 3 >= end) + return Token(error_token, token_begin, end); + + if (pos[0] == '\xE2' && pos[1] == '\x80' && pos[2] == expected_end_byte) + { + pos += 3; + return Token(success_token, token_begin, pos); + } + + ++pos; } } @@ -224,11 +252,11 @@ Token Lexer::nextTokenImpl() } case '\'': - return quotedString<'\'', TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed>(pos, token_begin, end); + return quotedString<'\''>(pos, token_begin, end, TokenType::StringLiteral, TokenType::ErrorSingleQuoteIsNotClosed); case '"': - return quotedString<'"', TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed>(pos, token_begin, end); + return quotedString<'"'>(pos, token_begin, end, TokenType::QuotedIdentifier, TokenType::ErrorDoubleQuoteIsNotClosed); case '`': - return quotedString<'`', TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed>(pos, token_begin, end); + return quotedString<'`'>(pos, token_begin, end, TokenType::QuotedIdentifier, TokenType::ErrorBackQuoteIsNotClosed); case '(': return Token(TokenType::OpeningRoundBracket, token_begin, ++pos); @@ -434,6 +462,15 @@ Token Lexer::nextTokenImpl() pos += 3; return Token(TokenType::Minus, token_begin, pos); } + /// Unicode quoted string, ‘Hello’ or “World”. + if (pos + 5 < end && pos[0] == '\xE2' && pos[1] == '\x80' && (pos[2] == '\x98' || pos[2] == '\x9C')) + { + const char expected_end_byte = pos[2] + 1; + TokenType success_token = pos[2] == '\x98' ? TokenType::StringLiteral : TokenType::QuotedIdentifier; + TokenType error_token = pos[2] == '\x98' ? TokenType::ErrorSingleQuoteIsNotClosed : TokenType::ErrorDoubleQuoteIsNotClosed; + pos += 3; + return quotedStringWithUnicodeQuotes(pos, token_begin, end, expected_end_byte, success_token, error_token); + } /// Other characters starting at E2 can be parsed, see skipWhitespacesUTF8 [[fallthrough]]; } @@ -447,7 +484,7 @@ Token Lexer::nextTokenImpl() if (heredoc_name_end_position != std::string::npos) { size_t heredoc_size = heredoc_name_end_position + 1; - std::string_view heredoc = {token_stream.data(), heredoc_size}; + std::string_view heredoc = {token_stream.data(), heredoc_size}; // NOLINT size_t heredoc_end_position = token_stream.find(heredoc, heredoc_size); if (heredoc_end_position != std::string::npos) @@ -501,8 +538,6 @@ const char * getTokenName(TokenType type) APPLY_FOR_TOKENS(M) #undef M } - - UNREACHABLE(); } diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index e228dba6c1f..6f31d56292d 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -75,7 +76,7 @@ namespace DB M(ErrorMaxQuerySizeExceeded) \ -enum class TokenType +enum class TokenType : uint8_t { #define M(TOKEN) TOKEN, APPLY_FOR_TOKENS(M) diff --git a/src/Parsers/LiteralEscapingStyle.h b/src/Parsers/LiteralEscapingStyle.h index 10d4d84a85d..5c3e79038ae 100644 --- a/src/Parsers/LiteralEscapingStyle.h +++ b/src/Parsers/LiteralEscapingStyle.h @@ -5,7 +5,7 @@ namespace DB { /// Method to escape single quotes. -enum class LiteralEscapingStyle +enum class LiteralEscapingStyle : uint8_t { Regular, /// Escape backslashes with backslash (\\) and quotes with backslash (\') PostgreSQL, /// Do not escape backslashes (\), escape quotes with quote ('') diff --git a/src/Parsers/ParserBackupQuery.cpp b/src/Parsers/ParserBackupQuery.cpp index 5eab0cb288d..6d2f4d8311d 100644 --- a/src/Parsers/ParserBackupQuery.cpp +++ b/src/Parsers/ParserBackupQuery.cpp @@ -304,7 +304,7 @@ namespace changes = assert_cast(settings.get())->changes; } - boost::remove_erase_if(changes, [](const SettingChange & change) { return change.name == "async"; }); + std::erase_if(changes, [](const SettingChange & change) { return change.name == "async"; }); // NOLINT changes.emplace_back("async", async); auto new_settings = std::make_shared(); diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index fd2bbbab177..2fa34696c58 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -17,9 +17,9 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected { ParserKeyword s_type(Keyword::TYPE); ParserKeyword s_granularity(Keyword::GRANULARITY); - ParserToken open(TokenType::OpeningRoundBracket); - ParserToken close(TokenType::ClosingRoundBracket); - ParserOrderByExpressionList order_list; + ParserToken open_p(TokenType::OpeningRoundBracket); + ParserToken close_p(TokenType::ClosingRoundBracket); + ParserOrderByExpressionList order_list_p; ParserDataType data_type_p; ParserExpression expression_p; @@ -29,17 +29,41 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected ASTPtr type; ASTPtr granularity; - /// Skip name parser for SQL-standard CREATE INDEX - if (expression_p.parse(pos, expr, expected)) + if (open_p.ignore(pos, expected)) { - } - else if (open.ignore(pos, expected)) - { - if (!order_list.parse(pos, expr, expected)) + ASTPtr order_list; + if (!order_list_p.parse(pos, order_list, expected)) return false; - if (!close.ignore(pos, expected)) + if (!close_p.ignore(pos, expected)) return false; + + if (order_list->children.empty()) + return false; + + /// CREATE INDEX with ASC, DESC is implemented only for SQL compatibility. + /// ASC and DESC modifiers are not supported and are ignored further. + if (order_list->children.size() == 1) + { + auto order_by_elem = order_list->children[0]; + expr = order_by_elem->children[0]; + } + else + { + auto tuple_func = makeASTFunction("tuple"); + tuple_func->arguments = std::make_shared(); + + for (const auto & order_by_elem : order_list->children) + { + auto elem_expr = order_by_elem->children[0]; + tuple_func->arguments->children.push_back(std::move(elem_expr)); + } + expr = std::move(tuple_func); + } + } + else if (!expression_p.parse(pos, expr, expected)) + { + return false; } if (s_type.ignore(pos, expected)) @@ -59,7 +83,9 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected index->part_of_create_index_query = true; if (granularity) + { index->granularity = granularity->as().value.safeGet(); + } else { auto index_type = index->getType(); diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index db773427d6f..c1b45871577 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1619,6 +1619,29 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (sql_security) query->sql_security = typeid_cast>(sql_security); + if (query->columns_list && query->columns_list->primary_key) + { + /// If engine is not set will use default one + if (!query->storage) + query->set(query->storage, std::make_shared()); + else if (query->storage->primary_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + + query->storage->primary_key = query->columns_list->primary_key; + + } + + if (query->columns_list && (query->columns_list->primary_key_from_columns)) + { + /// If engine is not set will use default one + if (!query->storage) + query->set(query->storage, std::make_shared()); + else if (query->storage->primary_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + + query->storage->primary_key = query->columns_list->primary_key_from_columns; + } + tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index 58eb00519d8..b5bc9f89990 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,10 +1,12 @@ #include #include +#include #include #include #include #include +#include namespace DB @@ -13,18 +15,60 @@ namespace DB namespace { +/// Parser of Dynamic type arguments: Dynamic(max_types=N) +class DynamicArgumentsParser : public IParserBase +{ +private: + const char * getName() const override { return "Dynamic data type optional argument"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ASTPtr identifier; + ParserIdentifier identifier_parser; + if (!identifier_parser.parse(pos, identifier, expected)) + return false; + + if (pos->type != TokenType::Equals) + { + expected.add(pos, "equals operator"); + return false; + } + + ++pos; + + ASTPtr number; + ParserNumber number_parser; + if (!number_parser.parse(pos, number, expected)) + return false; + + node = makeASTFunction("equals", identifier, number); + return true; + } +}; + /// Wrapper to allow mixed lists of nested and normal types. /// Parameters are either: /// - Nested table elements; /// - Enum element in form of 'a' = 1; /// - literal; -/// - another data type (or identifier) +/// - Dynamic type arguments; +/// - another data type (or identifier); class ParserDataTypeArgument : public IParserBase { +public: + explicit ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_) + { + } + private: const char * getName() const override { return "data type argument"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override { + if (type_name == "Dynamic") + { + DynamicArgumentsParser parser; + return parser.parse(pos, node, expected); + } + ParserNestedTable nested_parser; ParserDataType data_type_parser; ParserAllCollectionsOfLiterals literal_parser(false); @@ -39,6 +83,8 @@ private: || literal_parser.parse(pos, node, expected) || data_type_parser.parse(pos, node, expected); } + + std::string_view type_name; }; } @@ -57,6 +103,13 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; tryGetIdentifierNameInto(identifier, type_name); + /// Don't accept things like Array(`x.y`). + if (!std::all_of(type_name.begin(), type_name.end(), [](char c) { return isWordCharASCII(c) || c == '$'; })) + { + expected.add(pos, "type name"); + return false; + } + String type_name_upper = Poco::toUpper(type_name); String type_name_suffix; @@ -140,7 +193,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; /// Parse optional parameters - ParserList args_parser(std::make_unique(), std::make_unique(TokenType::Comma)); + ParserList args_parser(std::make_unique(type_name), std::make_unique(TokenType::Comma)); ASTPtr expr_list_args; if (!args_parser.parse(pos, expr_list_args, expected)) diff --git a/src/Parsers/ParserDropQuery.cpp b/src/Parsers/ParserDropQuery.cpp index 6efafa112d3..38ebca83dae 100644 --- a/src/Parsers/ParserDropQuery.cpp +++ b/src/Parsers/ParserDropQuery.cpp @@ -22,6 +22,7 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons ParserKeyword s_dictionary(Keyword::DICTIONARY); ParserKeyword s_view(Keyword::VIEW); ParserKeyword s_database(Keyword::DATABASE); + ParserKeyword s_from(Keyword::FROM); ParserKeyword s_all(Keyword::ALL); ParserKeyword s_tables(Keyword::TABLES); ParserToken s_dot(TokenType::Dot); @@ -59,6 +60,8 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons else if (s_all.ignore(pos, expected) && s_tables.ignore(pos, expected) && kind == ASTDropQuery::Kind::Truncate) { has_all_tables = true; + if (!s_from.ignore(pos, expected)) + return false; if (s_if_exists.ignore(pos, expected)) if_exists = true; diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 0bbb181b39c..04759f80388 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -107,6 +107,9 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!columns_p.parse(pos, columns, expected)) return false; + /// Optional trailing comma + ParserToken(TokenType::Comma).ignore(pos); + if (!s_rparen.ignore(pos, expected)) return false; } diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 696cb65e5be..0545c3e5568 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -67,7 +67,7 @@ namespace ErrorCodes return true; } -enum class SystemQueryTargetType +enum class SystemQueryTargetType : uint8_t { Model, Function, diff --git a/src/Parsers/SelectUnionMode.h b/src/Parsers/SelectUnionMode.h index 5c72ce65eb2..fc0a766eb66 100644 --- a/src/Parsers/SelectUnionMode.h +++ b/src/Parsers/SelectUnionMode.h @@ -5,7 +5,7 @@ namespace DB { -enum class SelectUnionMode +enum class SelectUnionMode : uint8_t { UNION_DEFAULT, UNION_ALL, diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp index fa792e7c8b5..08877e0b2fe 100644 --- a/src/Parsers/TokenIterator.cpp +++ b/src/Parsers/TokenIterator.cpp @@ -4,20 +4,6 @@ namespace DB { -Tokens::Tokens(const char * begin, const char * end, size_t max_query_size, bool skip_insignificant) -{ - Lexer lexer(begin, end, max_query_size); - - bool stop = false; - do - { - Token token = lexer.nextToken(); - stop = token.isEnd() || token.type == TokenType::ErrorMaxQuerySizeExceeded; - if (token.isSignificant() || (!skip_insignificant && !data.empty() && data.back().isSignificant())) - data.emplace_back(std::move(token)); - } while (!stop); -} - UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) { /// We have just two kind of parentheses: () and []. diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index 192f2f55e6a..207ddadb8bf 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -15,25 +15,44 @@ namespace DB */ /** Used as an input for parsers. - * All whitespace and comment tokens are transparently skipped. + * All whitespace and comment tokens are transparently skipped if `skip_insignificant`. */ class Tokens { private: std::vector data; - std::size_t last_accessed_index = 0; + Lexer lexer; + bool skip_insignificant; public: - Tokens(const char * begin, const char * end, size_t max_query_size = 0, bool skip_insignificant = true); - - ALWAYS_INLINE inline const Token & operator[](size_t index) + Tokens(const char * begin, const char * end, size_t max_query_size = 0, bool skip_insignificant_ = true) + : lexer(begin, end, max_query_size), skip_insignificant(skip_insignificant_) { - assert(index < data.size()); - last_accessed_index = std::max(last_accessed_index, index); - return data[index]; } - ALWAYS_INLINE inline const Token & max() { return data[last_accessed_index]; } + const Token & operator[] (size_t index) + { + while (true) + { + if (index < data.size()) + return data[index]; + + if (!data.empty() && data.back().isEnd()) + return data.back(); + + Token token = lexer.nextToken(); + + if (!skip_insignificant || token.isSignificant()) + data.emplace_back(token); + } + } + + const Token & max() + { + if (data.empty()) + return (*this)[0]; + return data.back(); + } }; diff --git a/src/Parsers/formatSettingName.cpp b/src/Parsers/formatSettingName.cpp index efbfffddd7b..59973379167 100644 --- a/src/Parsers/formatSettingName.cpp +++ b/src/Parsers/formatSettingName.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Parsers/iostream_debug_helpers.cpp b/src/Parsers/iostream_debug_helpers.cpp deleted file mode 100644 index b74d337b22d..00000000000 --- a/src/Parsers/iostream_debug_helpers.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "iostream_debug_helpers.h" -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -std::ostream & operator<<(std::ostream & stream, const Token & what) -{ - stream << "Token (type="<< static_cast(what.type) <<"){"<< std::string{what.begin, what.end} << "}"; - return stream; -} - -std::ostream & operator<<(std::ostream & stream, const Expected & what) -{ - stream << "Expected {variants="; - dumpValue(stream, what.variants) - << "; max_parsed_pos=" << what.max_parsed_pos << "}"; - return stream; -} - -std::ostream & operator<<(std::ostream & stream, const IAST & what) -{ - WriteBufferFromOStream buf(stream, 4096); - buf << "IAST{"; - what.dumpTree(buf); - buf << "}"; - return stream; -} - -} diff --git a/src/Parsers/iostream_debug_helpers.h b/src/Parsers/iostream_debug_helpers.h deleted file mode 100644 index 39f52ebcbc2..00000000000 --- a/src/Parsers/iostream_debug_helpers.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once -#include - -namespace DB -{ -struct Token; -std::ostream & operator<<(std::ostream & stream, const Token & what); - -struct Expected; -std::ostream & operator<<(std::ostream & stream, const Expected & what); - -class IAST; -std::ostream & operator<<(std::ostream & stream, const IAST & what); - -} - -#include diff --git a/src/Parsers/makeASTForLogicalFunction.cpp b/src/Parsers/makeASTForLogicalFunction.cpp index fd9b78a5f52..0d8b1f8c0dc 100644 --- a/src/Parsers/makeASTForLogicalFunction.cpp +++ b/src/Parsers/makeASTForLogicalFunction.cpp @@ -12,7 +12,7 @@ namespace DB ASTPtr makeASTForLogicalAnd(ASTs && arguments) { bool partial_result = true; - boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool + std::erase_if(arguments, [&](const ASTPtr & argument) { bool b; if (!tryGetLiteralBool(argument.get(), b)) @@ -41,7 +41,7 @@ ASTPtr makeASTForLogicalAnd(ASTs && arguments) ASTPtr makeASTForLogicalOr(ASTs && arguments) { bool partial_result = false; - boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool + std::erase_if(arguments, [&](const ASTPtr & argument) { bool b; if (!tryGetLiteralBool(argument.get(), b)) diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index 2ed551851e8..074b6797517 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 2a6abc23406..41c51267496 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -92,9 +92,7 @@ void writeQueryWithHighlightedErrorPositions( } else { - ssize_t bytes_to_hilite = UTF8::seqLength(*current_position_to_hilite); - if (bytes_to_hilite > end - current_position_to_hilite) - bytes_to_hilite = end - current_position_to_hilite; + ssize_t bytes_to_hilite = std::min(UTF8::seqLength(*current_position_to_hilite), end - current_position_to_hilite); /// Bright on red background. out << "\033[41;1m"; diff --git a/src/Parsers/queryNormalization.cpp b/src/Parsers/queryNormalization.cpp index 4a9dd8ceb98..4890ad6952d 100644 --- a/src/Parsers/queryNormalization.cpp +++ b/src/Parsers/queryNormalization.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index d62ad83c6b2..52a0d748d63 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -36,6 +36,12 @@ public: void visitImpl(const QueryTreeNodePtr & node) { + if (const auto * constant_node = node->as()) + /// Collect sets from source expression as well. + /// Most likely we will not build them, but those sets could be requested during analysis. + if (constant_node->hasSourceExpression()) + collectSets(constant_node->getSourceExpression(), planner_context); + auto * function_node = node->as(); if (!function_node || !isNameOfInFunction(function_node->getFunctionName())) return; diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index ce022cb0572..b40e23a9553 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1229,8 +1229,9 @@ void Planner::buildQueryPlanIfNeeded() if (query_plan.isInitialized()) return; - LOG_TRACE(getLogger("Planner"), "Query {} to stage {}{}", - query_tree->formatConvertedASTForErrorMessage(), + LOG_TRACE( + getLogger("Planner"), + "Query to stage {}{}", QueryProcessingStage::toString(select_query_options.to_stage), select_query_options.only_analyze ? " only analyze" : ""); @@ -1506,8 +1507,9 @@ void Planner::buildPlanForQueryNode() auto & mapping = join_tree_query_plan.query_node_to_plan_step_mapping; query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end()); - LOG_TRACE(getLogger("Planner"), "Query {} from stage {} to stage {}{}", - query_tree->formatConvertedASTForErrorMessage(), + LOG_TRACE( + getLogger("Planner"), + "Query from stage {} to stage {}{}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(select_query_options.to_stage), select_query_options.only_analyze ? " only analyze" : ""); diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 2b369eaa593..837307ba2ca 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -60,6 +60,7 @@ String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_nod if (constant_node.requiresCastCall()) { + /// Projection name for constants is _ so for _cast(1, 'String') we will have _cast(1_Uint8, 'String'_String) buffer << ", '" << constant_node.getResultType()->getName() << "'_String)"; } @@ -243,8 +244,34 @@ public: } case QueryTreeNodeType::LAMBDA: { - auto lambda_hash = node->getTreeHash(); - result = "__lambda_" + toString(lambda_hash); + /// Initially, the action name was `"__lambda_" + toString(node->getTreeHash());`. + /// This is not a good idea because: + /// * hash is different on initiator and shard if the default database is changed in cluster + /// * hash is reliable only within one node; any change will break queries in between versions + /// + /// Now, we calculate execution name as (names + types) for lambda arguments + action name (expression) + /// and this should be more reliable (as long as we trust the calculation of action name for functions) + + WriteBufferFromOwnString buffer; + + const auto & lambda_node = node->as(); + const auto & lambda_arguments_nodes = lambda_node.getArguments().getNodes(); + + size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size(); + for (size_t i = 0; i < lambda_arguments_nodes_size; ++i) + { + const auto & lambda_argument_node = lambda_arguments_nodes[i]; + buffer << calculateActionNodeName(lambda_argument_node); + buffer << ' '; + buffer << lambda_argument_node->as().getResultType()->getName(); + + if (i + 1 != lambda_arguments_nodes_size) + buffer << ", "; + } + + buffer << " -> " << calculateActionNodeName(lambda_node.getExpression()); + + result = buffer.str(); break; } default: diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index ad8db83d66c..f0a2845c3e8 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -51,6 +51,46 @@ FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_no return result; } +bool canRemoveConstantFromGroupByKey(const ConstantNode & root) +{ + const auto & source_expression = root.getSourceExpression(); + if (!source_expression) + return true; + + std::stack nodes; + nodes.push(source_expression.get()); + while (!nodes.empty()) + { + const auto * node = nodes.top(); + nodes.pop(); + + if (node->getNodeType() == QueryTreeNodeType::QUERY) + /// Allow removing constants from scalar subqueries. We send them to all the shards. + continue; + + const auto * constant_node = node->as(); + const auto * function_node = node->as(); + if (constant_node) + { + if (!canRemoveConstantFromGroupByKey(*constant_node)) + return false; + } + else if (function_node) + { + /// Do not allow removing constants like `hostName()` + if (!function_node->getFunctionOrThrow()->isDeterministic()) + return false; + + for (const auto & child : function_node->getArguments()) + nodes.push(child.get()); + } + else + return false; + } + + return true; +} + /** Construct aggregation analysis result if query tree has GROUP BY or aggregates. * Actions before aggregation are added into actions chain, if result is not null optional. */ @@ -85,6 +125,10 @@ std::optional analyzeAggregation(const QueryTreeNodeP bool group_by_use_nulls = planner_context->getQueryContext()->getSettingsRef().group_by_use_nulls && (query_node.isGroupByWithGroupingSets() || query_node.isGroupByWithRollup() || query_node.isGroupByWithCube()); + bool is_secondary_query = planner_context->getQueryContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + bool is_distributed_query = planner_context->getQueryContext()->isDistributed(); + bool check_constants_for_group_by_key = is_secondary_query || is_distributed_query; + if (query_node.hasGroupBy()) { if (query_node.isGroupByWithGroupingSets()) @@ -97,10 +141,10 @@ std::optional analyzeAggregation(const QueryTreeNodeP for (auto & grouping_set_key_node : grouping_set_keys_list_node_typed.getNodes()) { - auto is_constant_key = grouping_set_key_node->as() != nullptr; - group_by_with_constant_keys |= is_constant_key; + const auto * constant_key = grouping_set_key_node->as(); + group_by_with_constant_keys |= (constant_key != nullptr); - if (is_constant_key && !aggregates_descriptions.empty()) + if (constant_key && !aggregates_descriptions.empty() && (!check_constants_for_group_by_key || canRemoveConstantFromGroupByKey(*constant_key))) continue; auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, grouping_set_key_node); @@ -149,10 +193,10 @@ std::optional analyzeAggregation(const QueryTreeNodeP { for (auto & group_by_key_node : query_node.getGroupBy().getNodes()) { - auto is_constant_key = group_by_key_node->as() != nullptr; - group_by_with_constant_keys |= is_constant_key; + const auto * constant_key = group_by_key_node->as(); + group_by_with_constant_keys |= (constant_key != nullptr); - if (is_constant_key && !aggregates_descriptions.empty()) + if (constant_key && !aggregates_descriptions.empty() && (!check_constants_for_group_by_key || canRemoveConstantFromGroupByKey(*constant_key))) continue; auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, group_by_key_node); @@ -405,7 +449,6 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, for (auto & interpolate_node : interpolate_list_node.getNodes()) { auto & interpolate_node_typed = interpolate_node->as(); - interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); } @@ -545,7 +588,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo * otherwise coordinator does not find it in block. */ NameSet required_output_nodes_names; - if (sort_analysis_result_optional.has_value() && !planner_query_processing_info.isSecondStage()) + if (sort_analysis_result_optional.has_value() && planner_query_processing_info.isFirstStage() && planner_query_processing_info.getToStage() != QueryProcessingStage::Complete) { const auto & before_order_by_actions = sort_analysis_result_optional->before_order_by_actions; for (const auto & output_node : before_order_by_actions->getOutputs()) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 514c19b0f89..1b2a55a50b0 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -492,7 +492,7 @@ FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, throw DB::Exception( ErrorCodes::BAD_ARGUMENTS, "Parallel replicas processing with custom_key has been requested " - "(setting 'max_parallel_replcias'), but the table does not have custom_key defined for it " + "(setting 'max_parallel_replicas'), but the table does not have custom_key defined for it " " or it's invalid (setting 'parallel_replicas_custom_key')"); LOG_TRACE(getLogger("Planner"), "Processing query on a replica using custom_key '{}'", settings.parallel_replicas_custom_key.value); @@ -691,6 +691,9 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { if (max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows) table_expression_query_info.limit = max_block_size_limited; + /// Ask to read just enough rows to make the max_rows limit effective (so it has a chance to be triggered). + else if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows < std::numeric_limits::max()) + table_expression_query_info.limit = 1 + select_query_info.local_storage_limits.local_limits.size_limits.max_rows; } else { @@ -708,7 +711,15 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads if (max_streams > 1 && !is_sync_remote) - max_streams = static_cast(max_streams * settings.max_streams_to_max_threads_ratio); + { + if (auto streams_with_ratio = max_streams * settings.max_streams_to_max_threads_ratio; canConvertTo(streams_with_ratio)) + max_streams = static_cast(streams_with_ratio); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "Exceeded limit for `max_streams` with `max_streams_to_max_threads_ratio`. " + "Make sure that `max_streams * max_streams_to_max_threads_ratio` is in some reasonable boundaries, current value: {}", + streams_with_ratio); + } if (table_node) table_expression_query_info.table_expression_modifiers = table_node->getTableExpressionModifiers(); @@ -1196,7 +1207,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ } } - auto table_join = std::make_shared(settings, query_context->getGlobalTemporaryVolume()); + auto table_join = std::make_shared(settings, query_context->getGlobalTemporaryVolume(), query_context->getTempDataOnDisk()); table_join->getTableJoin() = join_node.toASTTableJoin()->as(); if (join_constant) diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 1fdf51f399f..c410b04f209 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -328,7 +328,7 @@ void buildJoinClause( { throw Exception( ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "JOIN {} join expression contains column from left and right table", + "JOIN {} join expression contains column from left and right table, you may try experimental support of this feature by `SET allow_experimental_join_condition = 1`", join_node.formatASTForErrorMessage()); } } @@ -363,7 +363,7 @@ void buildJoinClause( { throw Exception( ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "JOIN {} join expression contains column from left and right table", + "JOIN {} join expression contains column from left and right table, you may try experimental support of this feature by `SET allow_experimental_join_condition = 1`", join_node.formatASTForErrorMessage()); } } @@ -888,6 +888,14 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo return std::make_shared(table_join, right_table_expression_header); } + /** We have only one way to execute a CROSS JOIN - with a hash join. + * Therefore, for a query with an explicit CROSS JOIN, it should not fail because of the `join_algorithm` setting. + * If the user expects CROSS JOIN + WHERE to be rewritten to INNER join and to be executed with a specific algorithm, + * then the setting `cross_to_inner_join_rewrite` may be used, and unsupported cases will fail earlier. + */ + if (table_join->kind() == JoinKind::Cross) + return std::make_shared(table_join, right_table_expression_header); + if (!table_join->oneDisjunct() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH) && !table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index 9ab7a8e64fe..9723a00a356 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -137,7 +137,7 @@ public: if (it == column_name_to_column.end()) { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column for column name {} does not exists. There are only column names: {}", + "Column for column name {} does not exist. There are only column names: {}", column_name, fmt::join(column_names.begin(), column_names.end(), ", ")); } @@ -154,7 +154,7 @@ public: if (it == column_name_to_column_identifier.end()) { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column identifier for column name {} does not exists. There are only column names: {}", + "Column identifier for column name {} does not exist. There are only column names: {}", column_name, fmt::join(column_names.begin(), column_names.end(), ", ")); } diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index ef640bcd42d..f2bc1f060d8 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -1,24 +1,25 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include #include #include #include +#include +#include +#include #include #include +#include +#include +#include +#include #include #include +#include #include #include +#include +#include namespace DB { @@ -316,7 +317,8 @@ static const TableNode * findTableForParallelReplicas(const IQueryTreeNode * que case QueryTreeNodeType::TABLE: { const auto & table_node = query_tree_node->as(); - const auto & storage = table_node.getStorage(); + const auto * as_mat_view = typeid_cast(table_node.getStorage().get()); + const auto & storage = as_mat_view ? as_mat_view->getTargetTable() : table_node.getStorage(); if (std::dynamic_pointer_cast(storage) || typeid_cast(storage.get())) return &table_node; @@ -412,17 +414,16 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas( Block header = InterpreterSelectQueryAnalyzer::getSampleBlock( modified_query_tree, context, SelectQueryOptions(processed_stage).analyze()); - ClusterProxy::SelectStreamFactory select_stream_factory = - ClusterProxy::SelectStreamFactory( - header, - {}, - {}, - processed_stage); + const TableNode * table_node = findTableForParallelReplicas(modified_query_tree.get()); + if (!table_node) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't determine table for parallel replicas"); QueryPlan query_plan; ClusterProxy::executeQueryWithParallelReplicas( query_plan, - select_stream_factory, + table_node->getStorageID(), + header, + processed_stage, modified_query_ast, context, storage_limits); diff --git a/src/Processors/Executors/ExecutingGraph.h b/src/Processors/Executors/ExecutingGraph.h index e6d41321edd..71dcd360a2c 100644 --- a/src/Processors/Executors/ExecutingGraph.h +++ b/src/Processors/Executors/ExecutingGraph.h @@ -64,7 +64,7 @@ public: /// Status for processor. /// Can be owning or not. Owning means that executor who set this status can change node's data and nobody else can. - enum class ExecStatus + enum class ExecStatus : uint8_t { Idle, /// prepare returned NeedData or PortFull. Non-owning. Preparing, /// some executor is preparing processor, or processor is in task_queue. Owning. diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index 1039cf0e97a..7e3bee239ef 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -192,8 +192,7 @@ void ExecutorTasks::fill(Queue & queue) void ExecutorTasks::upscale(size_t use_threads_) { std::lock_guard lock(mutex); - if (use_threads < use_threads_) - use_threads = use_threads_; + use_threads = std::max(use_threads, use_threads_); } void ExecutorTasks::processAsyncTasks() diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 5b5880759e6..49ec9999521 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -100,8 +100,7 @@ void PipelineExecutor::finish() void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) { checkTimeLimit(); - if (num_threads < 1) - num_threads = 1; + num_threads = std::max(num_threads, 1); OpenTelemetry::SpanHolder span("PipelineExecutor::execute()"); span.addAttribute("clickhouse.thread_num", num_threads); diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index fc9a827be66..72a93002669 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -115,7 +115,9 @@ const BlockMissingValues & ArrowBlockInputFormat::getMissingValues() const static std::shared_ptr createStreamReader(ReadBuffer & in) { - auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(std::make_unique(in)); + auto options = arrow::ipc::IpcReadOptions::Defaults(); + options.memory_pool = ArrowMemoryPool::instance(); + auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(std::make_unique(in), options); if (!stream_reader_status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", stream_reader_status.status().ToString()); @@ -128,7 +130,9 @@ static std::shared_ptr createFileReader(ReadB if (is_stopped) return nullptr; - auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(arrow_file); + auto options = arrow::ipc::IpcReadOptions::Defaults(); + options.memory_pool = ArrowMemoryPool::instance(); + auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(arrow_file, options); if (!file_reader_status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", file_reader_status.status().ToString()); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index 84375ccd5ce..88cca68e1a3 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -100,7 +101,7 @@ arrow::Result RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbyt arrow::Result> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes) { - ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes)) + ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes, ArrowMemoryPool::instance())) ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data())) if (bytes_read < nbytes) @@ -157,7 +158,7 @@ arrow::Result ArrowInputStreamFromReadBuffer::Read(int64_t nbytes, void arrow::Result> ArrowInputStreamFromReadBuffer::Read(int64_t nbytes) { - ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes)) + ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes, ArrowMemoryPool::instance())) ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data())) if (bytes_read < nbytes) @@ -193,7 +194,8 @@ arrow::Result RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_ { try { - return in.readBigAt(reinterpret_cast(out), nbytes, position, nullptr); + int64_t r = in.readBigAt(reinterpret_cast(out), nbytes, position, nullptr); + return r; } catch (...) { @@ -205,7 +207,7 @@ arrow::Result RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_ arrow::Result> RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_t position, int64_t nbytes) { - ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes)) + ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes, ArrowMemoryPool::instance())) ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, ReadAt(position, nbytes, buffer->mutable_data())) if (bytes_read < nbytes) @@ -231,6 +233,71 @@ arrow::Result RandomAccessFileFromRandomAccessReadBuffer::Tell() const arrow::Result RandomAccessFileFromRandomAccessReadBuffer::Read(int64_t, void*) { return arrow::Status::NotImplemented(""); } arrow::Result> RandomAccessFileFromRandomAccessReadBuffer::Read(int64_t) { return arrow::Status::NotImplemented(""); } +ArrowMemoryPool * ArrowMemoryPool::instance() +{ + static ArrowMemoryPool x; + return &x; +} + +arrow::Status ArrowMemoryPool::Allocate(int64_t size, int64_t alignment, uint8_t ** out) +{ + if (size == 0) + { + *out = arrow::memory_pool::internal::kZeroSizeArea; + return arrow::Status::OK(); + } + + try // is arrow exception-safe? idk, let's avoid throwing, just in case + { + void * p = Allocator().alloc(size_t(size), size_t(alignment)); + *out = reinterpret_cast(p); + } + catch (...) + { + return arrow::Status::OutOfMemory("allocation of size ", size, " failed"); + } + + return arrow::Status::OK(); +} + +arrow::Status ArrowMemoryPool::Reallocate(int64_t old_size, int64_t new_size, int64_t alignment, uint8_t ** ptr) +{ + if (old_size == 0) + { + chassert(*ptr == arrow::memory_pool::internal::kZeroSizeArea); + return Allocate(new_size, alignment, ptr); + } + if (new_size == 0) + { + Free(*ptr, old_size, alignment); + *ptr = arrow::memory_pool::internal::kZeroSizeArea; + return arrow::Status::OK(); + } + + try + { + void * p = Allocator().realloc(*ptr, size_t(old_size), size_t(new_size), size_t(alignment)); + *ptr = reinterpret_cast(p); + } + catch (...) + { + return arrow::Status::OutOfMemory("reallocation of size ", new_size, " failed"); + } + + return arrow::Status::OK(); +} + +void ArrowMemoryPool::Free(uint8_t * buffer, int64_t size, int64_t /*alignment*/) +{ + if (size == 0) + { + chassert(buffer == arrow::memory_pool::internal::kZeroSizeArea); + return; + } + + Allocator().free(buffer, size_t(size)); +} + std::shared_ptr asArrowFile( ReadBuffer & in, diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index f455bcdfb1a..e7b3e846a24 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -6,6 +6,7 @@ #include #include +#include #define ORC_MAGIC_BYTES "ORC" #define PARQUET_MAGIC_BYTES "PAR1" @@ -124,6 +125,27 @@ private: ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer); }; +/// By default, arrow allocated memory using posix_memalign(), which is currently not equipped with +/// clickhouse memory tracking. This adapter adds memory tracking. +class ArrowMemoryPool : public arrow::MemoryPool +{ +public: + static ArrowMemoryPool * instance(); + + arrow::Status Allocate(int64_t size, int64_t alignment, uint8_t ** out) override; + arrow::Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment, uint8_t ** ptr) override; + void Free(uint8_t * buffer, int64_t size, int64_t alignment) override; + + std::string backend_name() const override { return "clickhouse"; } + + int64_t bytes_allocated() const override { return 0; } + int64_t total_bytes_allocated() const override { return 0; } + int64_t num_allocations() const override { return 0; } + +private: + ArrowMemoryPool() = default; +}; + std::shared_ptr asArrowFile( ReadBuffer & in, const FormatSettings & settings, diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index ec2d17d73cb..ed91913de4d 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1133,7 +1134,7 @@ static void checkStatus(const arrow::Status & status, const String & column_name /// Create empty arrow column using specified field static std::shared_ptr createArrowColumn(const std::shared_ptr & field, const String & format_name) { - arrow::MemoryPool * pool = arrow::default_memory_pool(); + arrow::MemoryPool * pool = ArrowMemoryPool::instance(); std::unique_ptr array_builder; arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); checkStatus(status, field->name(), format_name); diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp index 340bcc8aae5..6a3475a1830 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp @@ -39,6 +39,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int TOO_LARGE_STRING_SIZE; extern const int UNKNOWN_TYPE; + extern const int TYPE_MISMATCH; } namespace @@ -820,7 +821,13 @@ bool BSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi /// Fill non-visited columns with the default values. for (size_t i = 0; i < num_columns; ++i) if (!seen_columns[i]) - header.getByPosition(i).type->insertDefaultInto(*columns[i]); + { + const auto & type = header.getByPosition(i).type; + if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot insert NULL value into a column of type '{}' at index {}", type->getName(), i); + else + type->insertDefaultInto(*columns[i]); + } if (format_settings.defaults_for_omitted_fields) ext.read_columns = read_columns; diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 2b40e796c5c..58bf4c1a2fc 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -418,7 +419,7 @@ namespace DB /// Convert dictionary values to arrow array. auto value_type = assert_cast(builder->type().get())->value_type(); std::unique_ptr values_builder; - arrow::MemoryPool* pool = arrow::default_memory_pool(); + arrow::MemoryPool* pool = ArrowMemoryPool::instance(); arrow::Status status = MakeBuilder(pool, value_type, &values_builder); checkStatus(status, column->getName(), format_name); @@ -1025,7 +1026,7 @@ namespace DB arrow_fields.emplace_back(std::make_shared(header_column.name, arrow_type, is_column_nullable)); } - arrow::MemoryPool * pool = arrow::default_memory_pool(); + arrow::MemoryPool * pool = ArrowMemoryPool::instance(); std::unique_ptr array_builder; arrow::Status status = MakeBuilder(pool, arrow_fields[column_i]->type(), &array_builder); checkStatus(status, column->getName(), format_name); diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index ab16aaa56ad..58f78e5af42 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -80,7 +80,7 @@ public: bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; } bool checkForSuffixImpl(bool check_eof); - inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); } + void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); } EscapingRule getEscapingRule() const override { return format_settings.custom.escaping_rule; } diff --git a/src/Processors/Formats/Impl/FormRowInputFormat.cpp b/src/Processors/Formats/Impl/FormRowInputFormat.cpp new file mode 100644 index 00000000000..d3c6f3798cc --- /dev/null +++ b/src/Processors/Formats/Impl/FormRowInputFormat.cpp @@ -0,0 +1,178 @@ +#include +#include "Formats/EscapingRuleUtils.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + +namespace +{ + String readFieldName(ReadBuffer & buf) + { + String field; + readStringUntilEquals(field, buf); + assertChar('=', buf); + return field; + } +} + +FormRowInputFormat::FormRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) : IRowInputFormat(std::move(header_), in_, params_), format_settings(format_settings_) +{ + const auto & header = getPort().getHeader(); + size_t num_columns = header.columns(); + for (size_t i = 0; i < num_columns; ++i) + name_map[header.getByPosition(i).name] = i; +} + +void FormRowInputFormat::readPrefix() +{ + skipBOMIfExists(*in); +} + +const String & FormRowInputFormat::columnName(size_t i) const +{ + return getPort().getHeader().getByPosition(i).name; +} + +void FormRowInputFormat::readField(size_t index, MutableColumns & columns) +{ + if (seen_columns[index]) + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate field found while parsing Form format: {}", columnName(index)); + + seen_columns[index] = true; + const auto & serialization = serializations[index]; + + String encoded_str, decoded_str; + readStringUntilAmpersand(encoded_str,*in); + + if (!in->eof()) + assertChar('&', *in); + + Poco::URI::decode(encoded_str, decoded_str); + ReadBufferFromString buf(decoded_str); + serialization->deserializeWholeText(*columns[index], buf, format_settings); +} + +void FormRowInputFormat::readFormData(MutableColumns & columns) +{ + size_t index = 0; + StringRef name_ref; + while (true) + { + if (in->eof()) + break; + + auto tmp = readFieldName(*in); + name_ref = StringRef(tmp); + auto * it = name_map.find(name_ref); + + if (!it) + { + if (!format_settings.skip_unknown_fields) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field found while parsing Form format: {}", name_ref.toString()); + + /// Skip the value if key is not found. + String encoded_str; + readStringUntilAmpersand(encoded_str, *in); + + if (!in->eof()) + assertChar('&',*in); + + } + else + { + index = it->getMapped(); + readField(index, columns); + } + } +} + +bool FormRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) +{ + if (in->eof()) + return false; + + size_t num_columns = columns.size(); + seen_columns.assign(num_columns, false); + + readFormData(columns); + + const auto & header = getPort().getHeader(); + /// Non-visited columns get filled with default values + for (size_t i = 0; i < num_columns; ++i) + if (!seen_columns[i]) + header.getByPosition(i).type->insertDefaultInto(*columns[i]); + + /// Return info about defaults set. + /// If defaults_for_omitted_fields is set to 0, then we leave already inserted defaults. + if (format_settings.defaults_for_omitted_fields) + ext.read_columns = seen_columns; + else + ext.read_columns.assign(seen_columns.size(), true); + return true; +} + +void FormRowInputFormat::resetParser() +{ + IRowInputFormat::resetParser(); + seen_columns.clear(); +} + +FormSchemaReader::FormSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : IRowWithNamesSchemaReader(in_, format_settings_,getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped)) +{ +} + +NamesAndTypesList readRowAndGetNamesAndDataTypesForFormRow(ReadBuffer & in, const FormatSettings & settings) +{ + NamesAndTypesList names_and_types; + String value; + String decoded_value; + do + { + auto name = readFieldName(in); + readStringUntilAmpersand(value,in); + Poco::URI::decode(value, decoded_value); + auto type = tryInferDataTypeByEscapingRule(decoded_value, settings, FormatSettings::EscapingRule::Raw); + names_and_types.emplace_back(name, type); + } + while (checkChar('&',in)); + return names_and_types; +} + +NamesAndTypesList FormSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof) +{ + if (in.eof()) + { + eof = true; + return {}; + } + return readRowAndGetNamesAndDataTypesForFormRow(in, format_settings); +} + +void registerInputFormatForm(FormatFactory & factory) +{ + factory.registerInputFormat("Form", []( + ReadBuffer & buf, + const Block & sample, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, std::move(params),settings); + }); +} + +void registerFormSchemaReader(FormatFactory & factory) +{ + factory.registerSchemaReader("Form", [](ReadBuffer & buffer, const FormatSettings & settings) + { + return std::make_shared(buffer, settings); + }); +} + +} diff --git a/src/Processors/Formats/Impl/FormRowInputFormat.h b/src/Processors/Formats/Impl/FormRowInputFormat.h new file mode 100644 index 00000000000..00a90f39e8e --- /dev/null +++ b/src/Processors/Formats/Impl/FormRowInputFormat.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class ReadBuffer; + +class FormRowInputFormat final : public IRowInputFormat +{ +public: + FormRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_); + String getName() const override { return "FormInputFormat"; } + void resetParser() override; + +private: + void readPrefix() override; + bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + void readFormData(MutableColumns & columns); + void readField(size_t index, MutableColumns & columns); + const String & columnName(size_t i) const; + + /// Hash table matches field name to position in the block + using NameMap = HashMap; + NameMap name_map; + +protected: + const FormatSettings format_settings; + std::vector seen_columns; +}; + +class FormSchemaReader : public IRowWithNamesSchemaReader +{ +public: + FormSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); +private: + NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override; + NamesAndTypesList readRowAndGetNamesAndDataTypesForForm(ReadBuffer & in, const FormatSettings & settings); +}; + +} + diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index faa4f36bbb0..e61e55efc8e 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int EMPTY_DATA_PASSED; + extern const int TYPE_MISMATCH; } @@ -194,6 +195,8 @@ Chunk JSONColumnsBlockInputFormatBase::read() { if (!seen_columns[i]) { + if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(fields[i].type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot insert NULL value into a column `{}` of type '{}'", fields[i].name, fields[i].type->getName()); columns[i]->insertManyDefaults(rows); if (format_settings.defaults_for_omitted_fields) block_missing_values.setBits(i, rows); diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index a78d8d016cd..8855a1bc28d 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes extern const int INCORRECT_DATA; extern const int CANNOT_READ_ALL_DATA; extern const int LOGICAL_ERROR; + extern const int TYPE_MISMATCH; } namespace @@ -233,7 +234,14 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi /// Fill non-visited columns with the default values. for (size_t i = 0; i < num_columns; ++i) if (!seen_columns[i]) - header.getByPosition(i).type->insertDefaultInto(*columns[i]); + { + const auto & type = header.getByPosition(i).type; + if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot insert NULL value into a column `{}` of type '{}'", columnName(i), type->getName()); + else + type->insertDefaultInto(*columns[i]); + } + /// Return info about defaults set. /// If defaults_for_omitted_fields is set to 0, we should just leave already inserted defaults. diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 98cbdeaaa4b..6b7f1f5206c 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -657,7 +657,6 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type()); } } - UNREACHABLE(); } std::optional MsgPackSchemaReader::readRowAndGetDataTypes() diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 2fa5c1d2850..0b55f633c6a 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -1548,7 +1548,7 @@ static ColumnWithTypeAndName readColumnFromORCColumn( if (pos) nested_type_hint = tuple_type_hint->getElement(*pos); } - else if (size_t(i) < tuple_type_hint->getElements().size()) + else if (i < tuple_type_hint->getElements().size()) nested_type_hint = tuple_type_hint->getElement(i); } diff --git a/src/Processors/Formats/Impl/NpyOutputFormat.cpp b/src/Processors/Formats/Impl/NpyOutputFormat.cpp new file mode 100644 index 00000000000..e02787b4f70 --- /dev/null +++ b/src/Processors/Formats/Impl/NpyOutputFormat.cpp @@ -0,0 +1,269 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TOO_MANY_COLUMNS; + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +template +void writeNumpyNumbers(const ColumnPtr & column, WriteBuffer & buf) +{ + const auto * number_column = assert_cast(column.get()); + for (size_t i = 0; i < number_column->size(); ++i) + writeBinaryLittleEndian(ValueType(number_column->getElement(i)), buf); +} + +template +void writeNumpyStrings(const ColumnPtr & column, size_t length, WriteBuffer & buf) +{ + const auto * string_column = assert_cast(column.get()); + for (size_t i = 0; i < string_column->size(); ++i) + { + auto data = string_column->getDataAt(i); + buf.write(data.data, data.size); + writeChar(0, length - data.size, buf); + } +} + +} + +String NpyOutputFormat::shapeStr() const +{ + WriteBufferFromOwnString shape; + writeIntText(num_rows, shape); + writeChar(',', shape); + for (UInt64 dim : numpy_shape) + { + writeIntText(dim, shape); + writeChar(',', shape); + } + + return shape.str(); +} + +NpyOutputFormat::NpyOutputFormat(WriteBuffer & out_, const Block & header_) : IOutputFormat(header_, out_) +{ + const auto & header = getPort(PortKind::Main).getHeader(); + auto data_types = header.getDataTypes(); + if (data_types.size() > 1) + throw Exception(ErrorCodes::TOO_MANY_COLUMNS, "Expected single column for Npy output format, got {}", data_types.size()); + data_type = data_types[0]; + + if (!getNumpyDataType(data_type)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type {} is not supported for Npy output format", nested_data_type->getName()); +} + +bool NpyOutputFormat::getNumpyDataType(const DataTypePtr & type) +{ + switch (type->getTypeId()) + { + case TypeIndex::Int8: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(Int8), true); + break; + case TypeIndex::Int16: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(Int16), true); + break; + case TypeIndex::Int32: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(Int32), true); + break; + case TypeIndex::Int64: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(Int64), true); + break; + case TypeIndex::UInt8: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(UInt8), false); + break; + case TypeIndex::UInt16: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(UInt16), false); + break; + case TypeIndex::UInt32: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(UInt32), false); + break; + case TypeIndex::UInt64: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(UInt64), false); + break; + case TypeIndex::Float32: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(Float32)); + break; + case TypeIndex::Float64: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::LITTLE, sizeof(Float64)); + break; + case TypeIndex::FixedString: + numpy_data_type = std::make_shared( + NumpyDataType::Endianness::NONE, assert_cast(type.get())->getN()); + break; + case TypeIndex::String: + numpy_data_type = std::make_shared(NumpyDataType::Endianness::NONE, 0); + break; + case TypeIndex::Array: + return getNumpyDataType(assert_cast(type.get())->getNestedType()); + default: + nested_data_type = type; + return false; + } + + nested_data_type = type; + return true; +} + +void NpyOutputFormat::consume(Chunk chunk) +{ + if (!invalid_shape) + { + num_rows += chunk.getNumRows(); + const auto & column = chunk.getColumns()[0]; + + if (!is_initialized) + { + initShape(column); + is_initialized = true; + } + + ColumnPtr nested_column = column; + checkShape(nested_column); + updateSizeIfTypeString(nested_column); + columns.push_back(nested_column); + } +} + +void NpyOutputFormat::initShape(const ColumnPtr & column) +{ + ColumnPtr nested_column = column; + while (const auto * array_column = typeid_cast(nested_column.get())) + { + auto dim = array_column->getOffsets()[0]; + invalid_shape = dim == 0; + numpy_shape.push_back(dim); + nested_column = array_column->getDataPtr(); + } + + if (invalid_shape) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Shape ({}) is invalid, as dimension size cannot be 0", shapeStr()); +} + +void NpyOutputFormat::checkShape(ColumnPtr & column) +{ + int dim = 0; + while (const auto * array_column = typeid_cast(column.get())) + { + const auto & array_offset = array_column->getOffsets(); + + for (size_t i = 0; i < array_offset.size(); ++i) + if (array_offset[i] - array_offset[i - 1] != numpy_shape[dim]) + { + invalid_shape = true; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ClickHouse doesn't support object types, cannot format ragged nested sequences (which is a list of arrays with different shapes)"); + } + + column = array_column->getDataPtr(); + dim += 1; + } +} + +void NpyOutputFormat::updateSizeIfTypeString(const ColumnPtr & column) +{ + if (nested_data_type->getTypeId() == TypeIndex::String) + { + const auto & string_offsets = assert_cast(column.get())->getOffsets(); + for (size_t i = 0; i < string_offsets.size(); ++i) + { + size_t string_length = static_cast(string_offsets[i] - 1 - string_offsets[i - 1]); + if (numpy_data_type->getSize() < string_length) + numpy_data_type->setSize(string_length); + } + } +} + +void NpyOutputFormat::finalizeImpl() +{ + if (!invalid_shape) + { + writeHeader(); + writeColumns(); + } +} + +void NpyOutputFormat::writeHeader() +{ + String dict = "{'descr':'" + numpy_data_type->str() + "','fortran_order':False,'shape':(" + shapeStr() + "),}"; + String padding = "\n"; + + /// completes the length of the header, which is divisible by 64. + size_t dict_length = dict.length() + 1; + size_t header_length = STATIC_HEADER_LENGTH + sizeof(UInt32) + dict_length; + if (header_length % 64) + { + header_length = ((header_length / 64) + 1) * 64; + dict_length = header_length - STATIC_HEADER_LENGTH - sizeof(UInt32); + padding = std::string(dict_length - dict.length(), '\x20'); + padding.back() = '\n'; + } + + out.write(STATIC_HEADER, STATIC_HEADER_LENGTH); + writeBinaryLittleEndian(static_cast(dict_length), out); + out.write(dict.data(), dict.length()); + out.write(padding.data(), padding.length()); +} + +void NpyOutputFormat::writeColumns() +{ + for (const auto & column : columns) + { + switch (nested_data_type->getTypeId()) + { + case TypeIndex::Int8: writeNumpyNumbers(column, out); break; + case TypeIndex::Int16: writeNumpyNumbers(column, out); break; + case TypeIndex::Int32: writeNumpyNumbers(column, out); break; + case TypeIndex::Int64: writeNumpyNumbers(column, out); break; + case TypeIndex::UInt8: writeNumpyNumbers(column, out); break; + case TypeIndex::UInt16: writeNumpyNumbers(column, out); break; + case TypeIndex::UInt32: writeNumpyNumbers(column, out); break; + case TypeIndex::UInt64: writeNumpyNumbers(column, out); break; + case TypeIndex::Float32: writeNumpyNumbers(column, out); break; + case TypeIndex::Float64: writeNumpyNumbers(column, out); break; + case TypeIndex::FixedString: + writeNumpyStrings(column, numpy_data_type->getSize(), out); + break; + case TypeIndex::String: + writeNumpyStrings(column, numpy_data_type->getSize(), out); + break; + default: + break; + } + } +} + +void registerOutputFormatNpy(FormatFactory & factory) +{ + factory.registerOutputFormat("Npy",[]( + WriteBuffer & buf, + const Block & sample, + const FormatSettings &) + { + return std::make_shared(buf, sample); + }); + factory.markFormatHasNoAppendSupport("Npy"); +} + +} diff --git a/src/Processors/Formats/Impl/NpyOutputFormat.h b/src/Processors/Formats/Impl/NpyOutputFormat.h new file mode 100644 index 00000000000..5dd6552ac0c --- /dev/null +++ b/src/Processors/Formats/Impl/NpyOutputFormat.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +/** Stream for output data in Npy format. + * https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html + */ +class NpyOutputFormat : public IOutputFormat +{ +public: + NpyOutputFormat(WriteBuffer & out_, const Block & header_); + + String getName() const override { return "NpyOutputFormat"; } + + String getContentType() const override { return "application/octet-stream"; } + +private: + String shapeStr() const; + + bool getNumpyDataType(const DataTypePtr & type); + + void consume(Chunk) override; + void initShape(const ColumnPtr & column); + void checkShape(ColumnPtr & column); + void updateSizeIfTypeString(const ColumnPtr & column); + + void finalizeImpl() override; + void writeHeader(); + void writeColumns(); + + bool is_initialized = false; + bool invalid_shape = false; + + DataTypePtr data_type; + DataTypePtr nested_data_type; + std::shared_ptr numpy_data_type; + UInt64 num_rows = 0; + std::vector numpy_shape; + Columns columns; + + /// static header (version 3.0) + constexpr static auto STATIC_HEADER = "\x93NUMPY\x03\x00"; + constexpr static size_t STATIC_HEADER_LENGTH = 8; +}; + +} diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index aa83b87b2d2..a3c218fa26e 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -103,7 +103,7 @@ static void getFileReaderAndSchema( if (is_stopped) return; - auto result = arrow::adapters::orc::ORCFileReader::Open(arrow_file, arrow::default_memory_pool()); + auto result = arrow::adapters::orc::ORCFileReader::Open(arrow_file, ArrowMemoryPool::instance()); if (!result.ok()) throw Exception::createDeprecated(result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); file_reader = std::move(result).ValueOrDie(); diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h index 3e8069cba0d..341141dd633 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h @@ -205,7 +205,7 @@ private: }; /// Some information about what methods to call from internal parser. - enum class ProcessingUnitType + enum class ProcessingUnitType : uint8_t { START, PLAIN, diff --git a/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h new file mode 100644 index 00000000000..2c78949e8e1 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +namespace parquet +{ + +class PageReader; +class ColumnChunkMetaData; +class DataPageV1; +class DataPageV2; + +} + +namespace DB +{ + +class ParquetColumnReader +{ +public: + virtual ColumnWithTypeAndName readBatch(UInt64 rows_num, const String & name) = 0; + + virtual ~ParquetColumnReader() = default; +}; + +using ParquetColReaderPtr = std::unique_ptr; +using ParquetColReaders = std::vector; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h new file mode 100644 index 00000000000..57df6f59f72 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -0,0 +1,182 @@ +#pragma once + +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int PARQUET_EXCEPTION; +} + +template struct ToArrowDecimal; + +template <> struct ToArrowDecimal>> +{ + using ArrowDecimal = arrow::Decimal128; +}; + +template <> struct ToArrowDecimal>> +{ + using ArrowDecimal = arrow::Decimal256; +}; + + +class ParquetDataBuffer +{ +private: + +public: + ParquetDataBuffer(const uint8_t * data_, UInt64 available_, UInt8 datetime64_scale_ = DataTypeDateTime64::default_scale) + : data(reinterpret_cast(data_)), available(available_), datetime64_scale(datetime64_scale_) {} + + template + void ALWAYS_INLINE readValue(TValue & dst) + { + readBytes(&dst, sizeof(TValue)); + } + + void ALWAYS_INLINE readBytes(void * dst, size_t bytes) + { + checkAvaible(bytes); + std::copy(data, data + bytes, reinterpret_cast(dst)); + consume(bytes); + } + + void ALWAYS_INLINE readDateTime64FromInt96(DateTime64 & dst) + { + static const int max_scale_num = 9; + static const UInt64 pow10[max_scale_num + 1] + = {1000000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1}; + static const UInt64 spd = 60 * 60 * 24; + static const UInt64 scaled_day[max_scale_num + 1] + = {spd, + 10 * spd, + 100 * spd, + 1000 * spd, + 10000 * spd, + 100000 * spd, + 1000000 * spd, + 10000000 * spd, + 100000000 * spd, + 1000000000 * spd}; + + parquet::Int96 tmp; + readValue(tmp); + auto decoded = parquet::DecodeInt96Timestamp(tmp); + + uint64_t scaled_nano = decoded.nanoseconds / pow10[datetime64_scale]; + dst = static_cast(decoded.days_since_epoch * scaled_day[datetime64_scale] + scaled_nano); + } + + /** + * This method should only be used to read string whose elements size is small. + * Because memcpySmallAllowReadWriteOverflow15 instead of memcpy is used according to ColumnString::indexImpl + */ + void ALWAYS_INLINE readString(ColumnString & column, size_t cursor) + { + // refer to: PlainByteArrayDecoder::DecodeArrowDense in encoding.cc + // deserializeBinarySSE2 in SerializationString.cpp + checkAvaible(4); + auto value_len = ::arrow::util::SafeLoadAs(getArrowData()); + if (unlikely(value_len < 0 || value_len > INT32_MAX - 4)) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Invalid or corrupted value_len '{}'", value_len); + } + consume(4); + checkAvaible(value_len); + + auto chars_cursor = column.getChars().size(); + column.getChars().resize(chars_cursor + value_len + 1); + + memcpySmallAllowReadWriteOverflow15(&column.getChars()[chars_cursor], data, value_len); + column.getChars().back() = 0; + + column.getOffsets().data()[cursor] = column.getChars().size(); + consume(value_len); + } + + template + void ALWAYS_INLINE readOverBigDecimal(TDecimal * out, Int32 elem_bytes_num) + { + using TArrowDecimal = typename ToArrowDecimal::ArrowDecimal; + + checkAvaible(elem_bytes_num); + + // refer to: RawBytesToDecimalBytes in reader_internal.cc, Decimal128::FromBigEndian in decimal.cc + auto status = TArrowDecimal::FromBigEndian(getArrowData(), elem_bytes_num); + assert(status.ok()); + status.ValueUnsafe().ToBytes(reinterpret_cast(out)); + consume(elem_bytes_num); + } + +private: + const Int8 * data; + UInt64 available; + const UInt8 datetime64_scale; + + void ALWAYS_INLINE checkAvaible(UInt64 num) + { + if (unlikely(available < num)) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Consuming {} bytes while {} available", num, available); + } + } + + const uint8_t * ALWAYS_INLINE getArrowData() { return reinterpret_cast(data); } + + void ALWAYS_INLINE consume(UInt64 num) + { + data += num; + available -= num; + } +}; + + +class LazyNullMap +{ +public: + explicit LazyNullMap(UInt64 size_) : size(size_), col_nullable(nullptr) {} + + template + requires std::is_integral_v + void setNull(T cursor) + { + initialize(); + null_map[cursor] = 1; + } + + template + requires std::is_integral_v + void setNull(T cursor, UInt32 count) + { + initialize(); + memset(null_map + cursor, 1, count); + } + + ColumnPtr getNullableCol() { return col_nullable; } + +private: + UInt64 size; + UInt8 * null_map; + ColumnPtr col_nullable; + + void initialize() + { + if (likely(col_nullable)) + { + return; + } + auto col = ColumnVector::create(size); + null_map = col->getData().data(); + col_nullable = std::move(col); + memset(null_map, 0, size); + } +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp new file mode 100644 index 00000000000..b8e4db8700c --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -0,0 +1,585 @@ +#include "ParquetDataValuesReader.h" + +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int PARQUET_EXCEPTION; +} + +RleValuesReader::RleValuesReader( + std::unique_ptr bit_reader_, Int32 bit_width_) + : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) +{ + if (unlikely(bit_width >= 64)) + { + // e.g. in GetValue_ in bit_stream_utils.h, uint64 type is used to read bit values + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "unsupported bit width {}", bit_width); + } +} + +void RleValuesReader::nextGroup() +{ + // refer to: + // RleDecoder::NextCounts in rle_encoding.h and VectorizedRleValuesReader::readNextGroup in Spark + UInt32 indicator_value = 0; + [[maybe_unused]] auto read_res = bit_reader->GetVlqInt(&indicator_value); + assert(read_res); + + cur_group_is_packed = indicator_value & 1; + cur_group_size = indicator_value >> 1; + + if (cur_group_is_packed) + { + cur_group_size *= 8; + cur_packed_bit_values.resize(cur_group_size); + bit_reader->GetBatch(bit_width, cur_packed_bit_values.data(), cur_group_size); + } + else + { + cur_value = 0; + read_res = bit_reader->GetAligned((bit_width + 7) / 8, &cur_value); + assert(read_res); + } + cur_group_cursor = 0; + +} + +template +void RleValuesReader::visitValues( + UInt32 num_values, IndividualVisitor && individual_visitor, RepeatedVisitor && repeated_visitor) +{ + // refer to: VisitNullBitmapInline in visitor_inline.h + while (num_values) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + individual_visitor(cur_packed_bit_values[i]); + } + } + else + { + repeated_visitor(cur_count, cur_value); + } + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::visitNullableValues( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + LazyNullMap & null_map, + IndividualVisitor && individual_visitor, + RepeatedVisitor && repeated_visitor) +{ + while (num_values) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + if (cur_packed_bit_values[i] == max_def_level) + { + individual_visitor(cursor); + } + else + { + null_map.setNull(cursor); + } + cursor++; + } + } + else + { + if (cur_value == max_def_level) + { + repeated_visitor(cursor, cur_count); + } + else + { + null_map.setNull(cursor, cur_count); + } + cursor += cur_count; + } + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::visitNullableBySteps( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + IndividualNullVisitor && individual_null_visitor, + SteppedValidVisitor && stepped_valid_visitor, + RepeatedVisitor && repeated_visitor) +{ + // refer to: + // RleDecoder::GetBatch in rle_encoding.h and TypedColumnReaderImpl::ReadBatchSpaced in column_reader.cc + // VectorizedRleValuesReader::readBatchInternal in Spark + while (num_values > 0) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + valid_index_steps.resize(cur_count + 1); + valid_index_steps[0] = 0; + auto step_idx = 0; + auto null_map_cursor = cursor; + + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + if (cur_packed_bit_values[i] == max_def_level) + { + valid_index_steps[++step_idx] = 1; + } + else + { + individual_null_visitor(null_map_cursor); + if (unlikely(valid_index_steps[step_idx] == UINT8_MAX)) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "unsupported packed values number"); + } + valid_index_steps[step_idx]++; + } + null_map_cursor++; + } + valid_index_steps.resize(step_idx + 1); + stepped_valid_visitor(cursor, valid_index_steps); + } + else + { + repeated_visitor(cur_value == max_def_level, cursor, cur_count); + } + + cursor += cur_count; + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::setValues(TValue * res_values, UInt32 num_values, ValueGetter && val_getter) +{ + visitValues( + num_values, + /* individual_visitor */ [&](Int32 val) + { + *(res_values++) = val_getter(val); + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + std::fill(res_values, res_values + count, val_getter(val)); + res_values += count; + } + ); +} + +template +void RleValuesReader::setValueBySteps( + TValue * res_values, + const std::vector & col_data_steps, + ValueGetter && val_getter) +{ + auto step_iterator = col_data_steps.begin(); + res_values += *(step_iterator++); + + visitValues( + static_cast(col_data_steps.size() - 1), + /* individual_visitor */ [&](Int32 val) + { + *res_values = val_getter(val); + res_values += *(step_iterator++); + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + auto getted_val = val_getter(val); + for (UInt32 i = 0; i < count; i++) + { + *res_values = getted_val; + res_values += *(step_iterator++); + } + } + ); +} + + +namespace +{ + +template +TValue * getResizedPrimitiveData(TColumn & column, size_t size) +{ + auto old_size = column.size(); + column.getData().resize(size); + memset(column.getData().data() + old_size, 0, sizeof(TValue) * (size - old_size)); + return column.getData().data(); +} + +} // anoynomous namespace + + +template <> +void ParquetPlainValuesReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto & column = *assert_cast(col_ptr.get()); + auto cursor = column.size(); + + column.getOffsets().resize(cursor + num_values); + auto * offset_data = column.getOffsets().data(); + auto & chars = column.getChars(); + + def_level_reader->visitValues( + num_values, + /* individual_visitor */ [&](Int32 val) + { + if (val == max_def_level) + { + plain_data_buffer.readString(column, cursor); + } + else + { + chars.push_back(0); + offset_data[cursor] = chars.size(); + null_map.setNull(cursor); + } + cursor++; + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + if (val == max_def_level) + { + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readString(column, cursor); + cursor++; + } + } + else + { + null_map.setNull(cursor, count); + + auto chars_size_bak = chars.size(); + chars.resize(chars_size_bak + count); + memset(&chars[chars_size_bak], 0, count); + + auto idx = cursor; + cursor += count; + for (auto val_offset = chars_size_bak; idx < cursor; idx++) + { + offset_data[idx] = ++val_offset; + } + } + } + ); +} + + +template <> +void ParquetPlainValuesReader, ParquetReaderTypes::TimestampInt96>::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData( + *assert_cast *>(col_ptr.get()), cursor + num_values); + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readDateTime64FromInt96(column_data[nest_cursor]); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + auto * col_data_pos = column_data + nest_cursor; + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readDateTime64FromInt96(col_data_pos[i]); + } + } + ); +} + +template +void ParquetPlainValuesReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(col_ptr.get()), cursor + num_values); + using TValue = std::decay_t; + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readValue(column_data[nest_cursor]); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + plain_data_buffer.readBytes(column_data + nest_cursor, count * sizeof(TValue)); + } + ); +} + + +template +void ParquetFixedLenPlainReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + if constexpr (std::same_as> || std::same_as>) + { + readOverBigDecimal(col_ptr, null_map, num_values); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported type"); + } +} + +template +void ParquetFixedLenPlainReader::readOverBigDecimal( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData( + *assert_cast(col_ptr.get()), cursor + num_values); + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readOverBigDecimal(column_data + nest_cursor, elem_bytes_num); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + auto col_data_pos = column_data + nest_cursor; + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readOverBigDecimal(col_data_pos + i, elem_bytes_num); + } + } + ); +} + + +template +void ParquetRleLCReader::readBatch( + MutableColumnPtr & index_col, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = index_col->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(index_col.get()), cursor + num_values); + + bool has_null = false; + + // in ColumnLowCardinality, first element in dictionary is null + // so we should increase each value by 1 in parquet index + auto val_getter = [&](Int32 val) { return val + 1; }; + + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](size_t nest_cursor) + { + column_data[nest_cursor] = 0; + has_null = true; + }, + /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) + { + rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); + }, + /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) + { + if (is_valid) + { + rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); + } + else + { + auto data_pos = column_data + nest_cursor; + std::fill(data_pos, data_pos + count, 0); + has_null = true; + } + } + ); + if (has_null) + { + null_map.setNull(0); + } +} + +template <> +void ParquetRleDictReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto & column = *assert_cast(col_ptr.get()); + auto cursor = column.size(); + std::vector value_cache; + + const auto & dict_chars = static_cast(page_dictionary).getChars(); + const auto & dict_offsets = static_cast(page_dictionary).getOffsets(); + + column.getOffsets().resize(cursor + num_values); + auto * offset_data = column.getOffsets().data(); + auto & chars = column.getChars(); + + auto append_nulls = [&](UInt8 num) + { + for (auto limit = cursor + num; cursor < limit; cursor++) + { + chars.push_back(0); + offset_data[cursor] = chars.size(); + null_map.setNull(cursor); + } + }; + + auto append_string = [&](Int32 dict_idx) + { + auto dict_chars_cursor = dict_offsets[dict_idx - 1]; + auto value_len = dict_offsets[dict_idx] - dict_chars_cursor; + auto chars_cursor = chars.size(); + chars.resize(chars_cursor + value_len); + + memcpySmallAllowReadWriteOverflow15(&chars[chars_cursor], &dict_chars[dict_chars_cursor], value_len); + offset_data[cursor] = chars.size(); + cursor++; + }; + + auto val_getter = [&](Int32 val) { return val + 1; }; + + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](size_t) {}, + /* stepped_valid_visitor */ [&](size_t, const std::vector & valid_index_steps) + { + value_cache.resize(valid_index_steps.size()); + rle_data_reader->setValues( + value_cache.data() + 1, static_cast(valid_index_steps.size() - 1), val_getter); + + append_nulls(valid_index_steps[0]); + for (size_t i = 1; i < valid_index_steps.size(); i++) + { + append_string(value_cache[i]); + append_nulls(valid_index_steps[i] - 1); + } + }, + /* repeated_visitor */ [&](bool is_valid, size_t, UInt32 count) + { + if (is_valid) + { + value_cache.resize(count); + rle_data_reader->setValues(value_cache.data(), count, val_getter); + for (UInt32 i = 0; i < count; i++) + { + append_string(value_cache[i]); + } + } + else + { + append_nulls(count); + } + } + ); +} + +template +void ParquetRleDictReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(col_ptr.get()), cursor + num_values); + const auto & dictionary_array = static_cast(page_dictionary).getData(); + + auto val_getter = [&](Int32 val) { return dictionary_array[val]; }; + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](size_t nest_cursor) + { + null_map.setNull(nest_cursor); + }, + /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) + { + rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); + }, + /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) + { + if (is_valid) + { + rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); + } + else + { + null_map.setNull(nest_cursor, count); + } + } + ); +} + + +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader; + +template class ParquetFixedLenPlainReader>; +template class ParquetFixedLenPlainReader>; + +template class ParquetRleLCReader; +template class ParquetRleLCReader; +template class ParquetRleLCReader; + +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h new file mode 100644 index 00000000000..fbccb612b3c --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -0,0 +1,265 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "ParquetDataBuffer.h" + +namespace DB +{ + +class RleValuesReader +{ +public: + RleValuesReader(std::unique_ptr bit_reader_, Int32 bit_width_); + + /** + * @brief Used when the bit_width is 0, so all elements have same value. + */ + explicit RleValuesReader(UInt32 total_size, Int32 val = 0) + : bit_reader(nullptr), bit_width(0), cur_group_size(total_size), cur_value(val), cur_group_is_packed(false) + {} + + void nextGroup(); + + void nextGroupIfNecessary() { if (cur_group_cursor >= cur_group_size) nextGroup(); } + + UInt32 curGroupLeft() const { return cur_group_size - cur_group_cursor; } + + /** + * @brief Visit num_values elements. + * For RLE encoding, for same group, the value is same, so they can be visited repeatedly. + * For BitPacked encoding, the values may be different with each other, so they must be visited individual. + * + * @tparam IndividualVisitor A callback with signature: void(Int32 val) + * @tparam RepeatedVisitor A callback with signature: void(UInt32 count, Int32 val) + */ + template + void visitValues(UInt32 num_values, IndividualVisitor && individual_visitor, RepeatedVisitor && repeated_visitor); + + /** + * @brief Visit num_values elements by parsed nullability. + * If the parsed value is same as max_def_level, then it is processed as null value. + * + * @tparam IndividualVisitor A callback with signature: void(size_t cursor) + * @tparam RepeatedVisitor A callback with signature: void(size_t cursor, UInt32 count) + * + * Because the null map is processed, so only the callbacks only need to process the valid data. + */ + template + void visitNullableValues( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + LazyNullMap & null_map, + IndividualVisitor && individual_visitor, + RepeatedVisitor && repeated_visitor); + + /** + * @brief Visit num_values elements by parsed nullability. + * It may be inefficient to process the valid data individually like in visitNullableValues, + * so a valid_index_steps index array is generated first, in order to process valid data continuously. + * + * @tparam IndividualNullVisitor A callback with signature: void(size_t cursor), used to process null value + * @tparam SteppedValidVisitor A callback with signature: + * void(size_t cursor, const std::vector & valid_index_steps) + * valid_index_steps records the gap size between two valid elements, + * i-th item in valid_index_steps describes how many elements there are + * from i-th valid element (include) to (i+1)-th valid element (exclude). + * + * take following BitPacked group values for example, and assuming max_def_level is 1: + * [1, 0, 1, 1, 0, 1 ] + * null valid null null valid null + * the second line shows the corresponding validation state, + * then the valid_index_steps has values [1, 3, 2]. + * Please note that the the sum of valid_index_steps is same as elements number in this group. + * TODO the definition of valid_index_steps should be updated when supporting nested types + * + * @tparam RepeatedVisitor A callback with signature: void(bool is_valid, UInt32 cursor, UInt32 count) + */ + template + void visitNullableBySteps( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + IndividualNullVisitor && null_visitor, + SteppedValidVisitor && stepped_valid_visitor, + RepeatedVisitor && repeated_visitor); + + /** + * @brief Set the Values to column_data directly + * + * @tparam TValue The type of column data. + * @tparam ValueGetter A callback with signature: TValue(Int32 val) + */ + template + void setValues(TValue * res_values, UInt32 num_values, ValueGetter && val_getter); + + /** + * @brief Set the value by valid_index_steps generated in visitNullableBySteps. + * According to visitNullableBySteps, the elements number is valid_index_steps.size()-1, + * so valid_index_steps.size()-1 elements are read, and set to column_data with steps in valid_index_steps + */ + template + void setValueBySteps( + TValue * res_values, + const std::vector & col_data_steps, + ValueGetter && val_getter); + +private: + std::unique_ptr bit_reader; + + std::vector cur_packed_bit_values; + std::vector valid_index_steps; + + const Int32 bit_width; + + UInt32 cur_group_size = 0; + UInt32 cur_group_cursor = 0; + Int32 cur_value; + bool cur_group_is_packed; +}; + +using RleValuesReaderPtr = std::unique_ptr; + + +class ParquetDataValuesReader +{ +public: + virtual void readBatch(MutableColumnPtr & column, LazyNullMap & null_map, UInt32 num_values) = 0; + + virtual ~ParquetDataValuesReader() = default; +}; + +using ParquetDataValuesReaderPtr = std::unique_ptr; + + +enum class ParquetReaderTypes +{ + Normal, + TimestampInt96, +}; + +/** + * The definition level is RLE or BitPacked encoding, while data is read directly + */ +template +class ParquetPlainValuesReader : public ParquetDataValuesReader +{ +public: + + ParquetPlainValuesReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + ParquetDataBuffer data_buffer_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , plain_data_buffer(std::move(data_buffer_)) + {} + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + ParquetDataBuffer plain_data_buffer; +}; + +/** + * The data and definition level encoding are same as ParquetPlainValuesReader. + * But the element size is const and bigger than primitive data type. + */ +template +class ParquetFixedLenPlainReader : public ParquetDataValuesReader +{ +public: + + ParquetFixedLenPlainReader( + Int32 max_def_level_, + Int32 elem_bytes_num_, + std::unique_ptr def_level_reader_, + ParquetDataBuffer data_buffer_) + : max_def_level(max_def_level_) + , elem_bytes_num(elem_bytes_num_) + , def_level_reader(std::move(def_level_reader_)) + , plain_data_buffer(std::move(data_buffer_)) + {} + + void readOverBigDecimal(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values); + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + Int32 elem_bytes_num; + std::unique_ptr def_level_reader; + ParquetDataBuffer plain_data_buffer; +}; + +/** + * Read data according to the format of ColumnLowCardinality format. + * + * Only index and null column are processed in this class. + * And all null value is mapped to first index in dictionary, + * so the result index valued is added by one. +*/ +template +class ParquetRleLCReader : public ParquetDataValuesReader +{ +public: + ParquetRleLCReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + std::unique_ptr rle_data_reader_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , rle_data_reader(std::move(rle_data_reader_)) + {} + + void readBatch(MutableColumnPtr & index_col, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + std::unique_ptr rle_data_reader; +}; + +/** + * The definition level is RLE or BitPacked encoded, + * and the index of dictionary is also RLE or BitPacked encoded. + * + * while the result is not parsed as a low cardinality column, + * instead, a normal column is generated. + */ +template +class ParquetRleDictReader : public ParquetDataValuesReader +{ +public: + ParquetRleDictReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + std::unique_ptr rle_data_reader_, + const IColumn & page_dictionary_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , rle_data_reader(std::move(rle_data_reader_)) + , page_dictionary(page_dictionary_) + {} + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + std::unique_ptr rle_data_reader; + const IColumn & page_dictionary; +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp new file mode 100644 index 00000000000..9e1cae9bb65 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp @@ -0,0 +1,542 @@ +#include "ParquetLeafColReader.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; + extern const int PARQUET_EXCEPTION; +} + +namespace +{ + +template +void visitColStrIndexType(size_t data_size, TypeVisitor && visitor) +{ + // refer to: DataTypeLowCardinality::createColumnUniqueImpl + if (data_size < (1ull << 8)) + { + visitor(static_cast(nullptr)); + } + else if (data_size < (1ull << 16)) + { + visitor(static_cast(nullptr)); + } + else if (data_size < (1ull << 32)) + { + visitor(static_cast(nullptr)); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported data size {}", data_size); + } +} + +void reserveColumnStrRows(MutableColumnPtr & col, UInt64 rows_num) +{ + col->reserve(rows_num); + + /// Never reserve for too big size according to SerializationString::deserializeBinaryBulk + if (rows_num < 256 * 1024 * 1024) + { + try + { + static_cast(col.get())->getChars().reserve(rows_num); + } + catch (Exception & e) + { + e.addMessage("(limit = " + toString(rows_num) + ")"); + throw; + } + } +}; + + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */); + +template <> +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & /* col_des */, + const DataTypePtr & /* data_type */) +{ + auto col = ColumnString::create(); + col->getOffsets().resize(page.num_values() + 1); + col->getChars().reserve(page.num_values()); + ParquetDataBuffer buffer(page.data(), page.size()); + + // will be read as low cardinality column + // in which case, the null key is set to first position, so the first string should be empty + col->getChars().push_back(0); + col->getOffsets()[0] = 1; + for (auto i = 1; i <= page.num_values(); i++) + { + buffer.readString(*col, i); + } + return col; +} + +template <> +ColumnPtr readDictPage>( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & data_type) +{ + + const auto & datetime_type = assert_cast(*data_type); + auto dict_col = ColumnDecimal::create(page.num_values(), datetime_type.getScale()); + auto * col_data = dict_col->getData().data(); + ParquetDataBuffer buffer(page.data(), page.size(), datetime_type.getScale()); + if (col_des.physical_type() == parquet::Type::INT64) + { + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(Int64)); + } + else + { + for (auto i = 0; i < page.num_values(); i++) + { + buffer.readDateTime64FromInt96(col_data[i]); + } + } + return dict_col; +} + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnDecimal::create(page.num_values(), col_des.type_scale()); + auto * col_data = dict_col->getData().data(); + ParquetDataBuffer buffer(page.data(), page.size()); + for (auto i = 0; i < page.num_values(); i++) + { + buffer.readOverBigDecimal(col_data + i, col_des.type_length()); + } + return dict_col; +} + +template requires (!std::is_same_v) +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnDecimal::create(page.num_values(), col_des.type_scale()); + ParquetDataBuffer buffer(page.data(), page.size()); + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(typename TColumnDecimal::ValueType)); + return dict_col; +} + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & /* col_des */, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnVector::create(page.num_values()); + ParquetDataBuffer buffer(page.data(), page.size()); + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(typename TColumnVector::ValueType)); + return dict_col; +} + + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer); + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer) +{ + return std::make_unique>( + col_des.max_definition_level(), + col_des.type_length(), + std::move(def_level_reader), + std::move(buffer)); +} + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer) +{ + if (std::is_same_v> && col_des.physical_type() == parquet::Type::INT96) + return std::make_unique>( + col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer)); + else + return std::make_unique>( + col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer)); +} + + +} // anonymous namespace + + +template +ParquetLeafColReader::ParquetLeafColReader( + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr base_type_, + std::unique_ptr meta_, + std::unique_ptr reader_) + : col_descriptor(col_descriptor_) + , base_data_type(base_type_) + , col_chunk_meta(std::move(meta_)) + , parquet_page_reader(std::move(reader_)) + , log(&Poco::Logger::get("ParquetLeafColReader")) +{ +} + +template +ColumnWithTypeAndName ParquetLeafColReader::readBatch(UInt64 rows_num, const String & name) +{ + reading_rows_num = rows_num; + auto readPageIfEmpty = [&]() + { + while (!cur_page_values) readPage(); + }; + + // make sure the dict page has been read, and the status is updated + readPageIfEmpty(); + resetColumn(rows_num); + + while (rows_num) + { + // if dictionary page encountered, another page should be read + readPageIfEmpty(); + + auto read_values = static_cast(std::min(rows_num, static_cast(cur_page_values))); + data_values_reader->readBatch(column, *null_map, read_values); + + cur_page_values -= read_values; + rows_num -= read_values; + } + + return releaseColumn(name); +} + +template <> +void ParquetLeafColReader::resetColumn(UInt64 rows_num) +{ + if (reading_low_cardinality) + { + assert(dictionary); + visitColStrIndexType(dictionary->size(), [&](TColVec *) + { + column = TColVec::create(); + }); + + // only first position is used + null_map = std::make_unique(1); + column->reserve(rows_num); + } + else + { + null_map = std::make_unique(rows_num); + column = ColumnString::create(); + reserveColumnStrRows(column, rows_num); + } +} + +template +void ParquetLeafColReader::resetColumn(UInt64 rows_num) +{ + assert(!reading_low_cardinality); + + column = base_data_type->createColumn(); + column->reserve(rows_num); + null_map = std::make_unique(rows_num); +} + +template +void ParquetLeafColReader::degradeDictionary() +{ + // if last batch read all dictionary indices, then degrade is not needed this time + if (!column) + { + dictionary = nullptr; + return; + } + assert(dictionary && !column->empty()); + + null_map = std::make_unique(reading_rows_num); + auto col_existing = std::move(column); + column = ColumnString::create(); + reserveColumnStrRows(column, reading_rows_num); + + ColumnString & col_dest = *static_cast(column.get()); + const ColumnString & col_dict_str = *static_cast(dictionary.get()); + + visitColStrIndexType(dictionary->size(), [&](TColVec *) + { + const TColVec & col_src = *static_cast(col_existing.get()); + + // It will be easier to create a ColumnLowCardinality and call convertToFullColumn() on it, + // while the performance loss is ignorable, the implementation can be updated next time. + col_dest.getOffsets().resize(col_src.size()); + for (size_t i = 0; i < col_src.size(); i++) + { + auto src_idx = col_src.getData()[i]; + if (0 == src_idx) + { + null_map->setNull(i); + } + auto dict_chars_cursor = col_dict_str.getOffsets()[src_idx - 1]; + auto str_len = col_dict_str.getOffsets()[src_idx] - dict_chars_cursor; + auto dst_chars_cursor = col_dest.getChars().size(); + col_dest.getChars().resize(dst_chars_cursor + str_len); + + memcpySmallAllowReadWriteOverflow15( + &col_dest.getChars()[dst_chars_cursor], &col_dict_str.getChars()[dict_chars_cursor], str_len); + col_dest.getOffsets()[i] = col_dest.getChars().size(); + } + }); + dictionary = nullptr; + LOG_DEBUG(log, "degraded dictionary to normal column"); +} + +template +ColumnWithTypeAndName ParquetLeafColReader::releaseColumn(const String & name) +{ + DataTypePtr data_type = base_data_type; + if (reading_low_cardinality) + { + MutableColumnPtr col_unique; + if (null_map->getNullableCol()) + { + data_type = std::make_shared(data_type); + col_unique = ColumnUnique::create(dictionary->assumeMutable(), true); + } + else + { + col_unique = ColumnUnique::create(dictionary->assumeMutable(), false); + } + column = ColumnLowCardinality::create(std::move(col_unique), std::move(column), true); + data_type = std::make_shared(data_type); + } + else + { + if (null_map->getNullableCol()) + { + column = ColumnNullable::create(std::move(column), null_map->getNullableCol()->assumeMutable()); + data_type = std::make_shared(data_type); + } + } + ColumnWithTypeAndName res = {std::move(column), data_type, name}; + column = nullptr; + null_map = nullptr; + + return res; +} + +template +void ParquetLeafColReader::readPage() +{ + // refer to: ColumnReaderImplBase::ReadNewPage in column_reader.cc + auto cur_page = parquet_page_reader->NextPage(); + switch (cur_page->type()) + { + case parquet::PageType::DATA_PAGE: + readPageV1(*std::static_pointer_cast(cur_page)); + break; + case parquet::PageType::DATA_PAGE_V2: + readPageV2(*std::static_pointer_cast(cur_page)); + break; + case parquet::PageType::DICTIONARY_PAGE: + { + const parquet::DictionaryPage & dict_page = *std::static_pointer_cast(cur_page); + if (unlikely( + dict_page.encoding() != parquet::Encoding::PLAIN_DICTIONARY + && dict_page.encoding() != parquet::Encoding::PLAIN)) + { + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary page encoding {}", dict_page.encoding()); + } + LOG_DEBUG(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name()); + + dictionary = readDictPage(dict_page, col_descriptor, base_data_type); + if (unlikely(dictionary->size() < 2)) + { + // must not small than ColumnUnique::numSpecialValues() + dictionary->assumeMutable()->insertManyDefaults(2); + } + if (std::is_same_v) + { + reading_low_cardinality = true; + } + break; + } + default: + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported page type: {}", cur_page->type()); + } +} + +template +void ParquetLeafColReader::readPageV1(const parquet::DataPageV1 & page) +{ + static parquet::LevelDecoder repetition_level_decoder; + + cur_page_values = page.num_values(); + + // refer to: VectorizedColumnReader::readPageV1 in Spark and LevelDecoder::SetData in column_reader.cc + if (page.definition_level_encoding() != parquet::Encoding::RLE && col_descriptor.max_definition_level() != 0) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unsupported encoding: {}", page.definition_level_encoding()); + } + const auto * buffer = page.data(); + auto max_size = page.size(); + + if (col_descriptor.max_repetition_level() > 0) + { + auto rep_levels_bytes = repetition_level_decoder.SetData( + page.repetition_level_encoding(), col_descriptor.max_repetition_level(), 0, buffer, max_size); + buffer += rep_levels_bytes; + max_size -= rep_levels_bytes; + } + + assert(col_descriptor.max_definition_level() >= 0); + std::unique_ptr def_level_reader; + if (col_descriptor.max_definition_level() > 0) + { + auto bit_width = arrow::bit_util::Log2(col_descriptor.max_definition_level() + 1); + auto num_bytes = ::arrow::util::SafeLoadAs(buffer); + auto bit_reader = std::make_unique(buffer + 4, num_bytes); + num_bytes += 4; + buffer += num_bytes; + max_size -= num_bytes; + def_level_reader = std::make_unique(std::move(bit_reader), bit_width); + } + else + { + def_level_reader = std::make_unique(page.num_values()); + } + + switch (page.encoding()) + { + case parquet::Encoding::PLAIN: + { + if (reading_low_cardinality) + { + reading_low_cardinality = false; + degradeDictionary(); + } + + ParquetDataBuffer parquet_buffer = [&]() + { + if constexpr (!std::is_same_v, TColumn>) + return ParquetDataBuffer(buffer, max_size); + + auto scale = assert_cast(*base_data_type).getScale(); + return ParquetDataBuffer(buffer, max_size, scale); + }(); + data_values_reader = createPlainReader( + col_descriptor, std::move(def_level_reader), std::move(parquet_buffer)); + break; + } + case parquet::Encoding::RLE_DICTIONARY: + case parquet::Encoding::PLAIN_DICTIONARY: + { + if (unlikely(!dictionary)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "dictionary should be existed"); + } + + // refer to: DictDecoderImpl::SetData in encoding.cc + auto bit_width = *buffer; + auto bit_reader = std::make_unique(++buffer, --max_size); + data_values_reader = createDictReader( + std::move(def_level_reader), std::make_unique(std::move(bit_reader), bit_width)); + break; + } + case parquet::Encoding::BYTE_STREAM_SPLIT: + case parquet::Encoding::DELTA_BINARY_PACKED: + case parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY: + case parquet::Encoding::DELTA_BYTE_ARRAY: + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unsupported encoding: {}", page.encoding()); + + default: + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unknown encoding type: {}", page.encoding()); + } +} + +template +void ParquetLeafColReader::readPageV2(const parquet::DataPageV2 & /*page*/) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "read page V2 is not implemented yet"); +} + +template +std::unique_ptr ParquetLeafColReader::createDictReader( + std::unique_ptr def_level_reader, std::unique_ptr rle_data_reader) +{ + if (reading_low_cardinality && std::same_as) + { + std::unique_ptr res; + visitColStrIndexType(dictionary->size(), [&](TCol *) + { + res = std::make_unique>( + col_descriptor.max_definition_level(), + std::move(def_level_reader), + std::move(rle_data_reader)); + }); + return res; + } + return std::make_unique>( + col_descriptor.max_definition_level(), + std::move(def_level_reader), + std::move(rle_data_reader), + *assert_cast(dictionary.get())); +} + + +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h new file mode 100644 index 00000000000..c5b14132f17 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include + +#include "ParquetColumnReader.h" +#include "ParquetDataValuesReader.h" + +namespace parquet +{ + +class ColumnDescriptor; + +} + + +namespace DB +{ + +template +class ParquetLeafColReader : public ParquetColumnReader +{ +public: + ParquetLeafColReader( + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr base_type_, + std::unique_ptr meta_, + std::unique_ptr reader_); + + ColumnWithTypeAndName readBatch(UInt64 rows_num, const String & name) override; + +private: + const parquet::ColumnDescriptor & col_descriptor; + DataTypePtr base_data_type; + std::unique_ptr col_chunk_meta; + std::unique_ptr parquet_page_reader; + std::unique_ptr data_values_reader; + + MutableColumnPtr column; + std::unique_ptr null_map; + + ColumnPtr dictionary; + + UInt64 reading_rows_num = 0; + UInt32 cur_page_values = 0; + bool reading_low_cardinality = false; + + Poco::Logger * log; + + void resetColumn(UInt64 rows_num); + void degradeDictionary(); + ColumnWithTypeAndName releaseColumn(const String & name); + + void readPage(); + void readPageV1(const parquet::DataPageV1 & page); + void readPageV2(const parquet::DataPageV2 & page); + + std::unique_ptr createDictReader( + std::unique_ptr def_level_reader, std::unique_ptr rle_data_reader); +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp new file mode 100644 index 00000000000..9a15789f267 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -0,0 +1,408 @@ +#include "ParquetRecordReader.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ParquetLeafColReader.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int PARQUET_EXCEPTION; +} + +#define THROW_PARQUET_EXCEPTION(s) \ + do \ + { \ + try { (s); } \ + catch (const ::parquet::ParquetException & e) \ + { \ + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Parquet exception: {}", e.what()); \ + } \ + } while (false) + +namespace +{ + +std::unique_ptr createFileReader( + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + parquet::ReaderProperties reader_properties, + std::shared_ptr metadata = nullptr) +{ + std::unique_ptr res; + THROW_PARQUET_EXCEPTION(res = parquet::ParquetFileReader::Open( + std::move(arrow_file), + reader_properties, + metadata)); + return res; +} + +class ColReaderFactory +{ +public: + ColReaderFactory( + const parquet::ArrowReaderProperties & arrow_properties_, + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr ch_type_, + std::unique_ptr meta_, + std::unique_ptr page_reader_) + : arrow_properties(arrow_properties_) + , col_descriptor(col_descriptor_) + , ch_type(std::move(ch_type_)) + , meta(std::move(meta_)) + , page_reader(std::move(page_reader_)) {} + + std::unique_ptr makeReader(); + +private: + const parquet::ArrowReaderProperties & arrow_properties; + const parquet::ColumnDescriptor & col_descriptor; + DataTypePtr ch_type; + std::unique_ptr meta; + std::unique_ptr page_reader; + + + UInt32 getScaleFromLogicalTimestamp(parquet::LogicalType::TimeUnit::unit tm_unit); + UInt32 getScaleFromArrowTimeUnit(arrow::TimeUnit::type tm_unit); + + std::unique_ptr fromInt32(); + std::unique_ptr fromInt64(); + std::unique_ptr fromByteArray(); + std::unique_ptr fromFLBA(); + + std::unique_ptr fromInt32INT(const parquet::IntLogicalType & int_type); + std::unique_ptr fromInt64INT(const parquet::IntLogicalType & int_type); + + template + auto makeLeafReader() + { + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(page_reader)); + } + + template + auto makeDecimalLeafReader() + { + auto data_type = std::make_shared>( + col_descriptor.type_precision(), col_descriptor.type_scale()); + return std::make_unique>>( + col_descriptor, std::move(data_type), std::move(meta), std::move(page_reader)); + } + + std::unique_ptr throwUnsupported(std::string msg = "") + { + throw Exception( + ErrorCodes::PARQUET_EXCEPTION, + "Unsupported logical type: {} and physical type: {} for field =={}=={}", + col_descriptor.logical_type()->ToString(), col_descriptor.physical_type(), col_descriptor.name(), msg); + } +}; + +UInt32 ColReaderFactory::getScaleFromLogicalTimestamp(parquet::LogicalType::TimeUnit::unit tm_unit) +{ + switch (tm_unit) + { + case parquet::LogicalType::TimeUnit::MILLIS: + return 3; + case parquet::LogicalType::TimeUnit::MICROS: + return 6; + case parquet::LogicalType::TimeUnit::NANOS: + return 9; + default: + throwUnsupported(PreformattedMessage::create(", invalid timestamp unit: {}", tm_unit)); + return 0; + } +} + +UInt32 ColReaderFactory::getScaleFromArrowTimeUnit(arrow::TimeUnit::type tm_unit) +{ + switch (tm_unit) + { + case arrow::TimeUnit::MILLI: + return 3; + case arrow::TimeUnit::MICRO: + return 6; + case arrow::TimeUnit::NANO: + return 9; + default: + throwUnsupported(PreformattedMessage::create(", invalid arrow time unit: {}", tm_unit)); + return 0; + } +} + +std::unique_ptr ColReaderFactory::fromInt32() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::INT: + return fromInt32INT(dynamic_cast(*col_descriptor.logical_type())); + case parquet::LogicalType::Type::NONE: + return makeLeafReader(); + case parquet::LogicalType::Type::DATE: + return makeLeafReader(); + case parquet::LogicalType::Type::DECIMAL: + return makeDecimalLeafReader(); + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromInt64() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::INT: + return fromInt64INT(dynamic_cast(*col_descriptor.logical_type())); + case parquet::LogicalType::Type::NONE: + return makeLeafReader(); + case parquet::LogicalType::Type::TIMESTAMP: + { + const auto & tm_type = dynamic_cast(*col_descriptor.logical_type()); + auto read_type = std::make_shared(getScaleFromLogicalTimestamp(tm_type.time_unit())); + return std::make_unique>>( + col_descriptor, std::move(read_type), std::move(meta), std::move(page_reader)); + } + case parquet::LogicalType::Type::DECIMAL: + return makeDecimalLeafReader(); + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromByteArray() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::STRING: + case parquet::LogicalType::Type::NONE: + return makeLeafReader(); + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromFLBA() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::DECIMAL: + { + if (col_descriptor.type_length() > 0) + { + if (col_descriptor.type_length() <= static_cast(sizeof(Decimal128))) + return makeDecimalLeafReader(); + else if (col_descriptor.type_length() <= static_cast(sizeof(Decimal256))) + return makeDecimalLeafReader(); + } + + return throwUnsupported(PreformattedMessage::create( + ", invalid type length: {}", col_descriptor.type_length())); + } + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromInt32INT(const parquet::IntLogicalType & int_type) +{ + switch (int_type.bit_width()) + { + case 32: + { + if (int_type.is_signed()) + return makeLeafReader(); + else + return makeLeafReader(); + } + default: + return throwUnsupported(PreformattedMessage::create(", bit width: {}", int_type.bit_width())); + } +} + +std::unique_ptr ColReaderFactory::fromInt64INT(const parquet::IntLogicalType & int_type) +{ + switch (int_type.bit_width()) + { + case 64: + { + if (int_type.is_signed()) + return makeLeafReader(); + else + return makeLeafReader(); + } + default: + return throwUnsupported(PreformattedMessage::create(", bit width: {}", int_type.bit_width())); + } +} + +// refer: GetArrowType method in schema_internal.cc of arrow +std::unique_ptr ColReaderFactory::makeReader() +{ + // this method should to be called only once for each instance + SCOPE_EXIT({ page_reader = nullptr; }); + assert(page_reader); + + switch (col_descriptor.physical_type()) + { + case parquet::Type::BOOLEAN: + break; + case parquet::Type::INT32: + return fromInt32(); + case parquet::Type::INT64: + return fromInt64(); + case parquet::Type::INT96: + { + DataTypePtr read_type = ch_type; + if (!isDateTime64(ch_type)) + { + auto scale = getScaleFromArrowTimeUnit(arrow_properties.coerce_int96_timestamp_unit()); + read_type = std::make_shared(scale); + } + return std::make_unique>>( + col_descriptor, read_type, std::move(meta), std::move(page_reader)); + } + case parquet::Type::FLOAT: + return makeLeafReader(); + case parquet::Type::DOUBLE: + return makeLeafReader(); + case parquet::Type::BYTE_ARRAY: + return fromByteArray(); + case parquet::Type::FIXED_LEN_BYTE_ARRAY: + return fromFLBA(); + default: + break; + } + + return throwUnsupported(); +} + +} // anonymous namespace + +ParquetRecordReader::ParquetRecordReader( + Block header_, + parquet::ArrowReaderProperties arrow_properties_, + parquet::ReaderProperties reader_properties_, + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + const FormatSettings & format_settings, + std::vector row_groups_indices_, + std::shared_ptr metadata) + : file_reader(createFileReader(std::move(arrow_file), reader_properties_, std::move(metadata))) + , arrow_properties(arrow_properties_) + , header(std::move(header_)) + , max_block_size(format_settings.parquet.max_block_size) + , row_groups_indices(std::move(row_groups_indices_)) + , left_rows(getTotalRows(*file_reader->metadata())) +{ + log = &Poco::Logger::get("ParquetRecordReader"); + + std::unordered_map parquet_columns; + const auto * root = file_reader->metadata()->schema()->group_node(); + for (int i = 0; i < root->field_count(); ++i) + { + const auto & node = root->field(i); + parquet_columns.emplace(node->name(), node); + } + + parquet_col_indice.reserve(header.columns()); + column_readers.reserve(header.columns()); + for (const auto & col_with_name : header) + { + auto it = parquet_columns.find(col_with_name.name); + if (it == parquet_columns.end()) + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "no column with '{}' in parquet file", col_with_name.name); + + const auto & node = it->second; + if (!node->is_primitive()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "arrays and maps are not implemented in native parquet reader"); + + auto idx = file_reader->metadata()->schema()->ColumnIndex(*node); + chassert(idx >= 0); + parquet_col_indice.push_back(idx); + } + if (arrow_properties.pre_buffer()) + { + THROW_PARQUET_EXCEPTION(file_reader->PreBuffer( + row_groups_indices, parquet_col_indice, arrow_properties.io_context(), arrow_properties.cache_options())); + } +} + +Chunk ParquetRecordReader::readChunk() +{ + if (!left_rows) + { + return Chunk{}; + } + if (!cur_row_group_left_rows) + { + loadNextRowGroup(); + } + + Columns columns(header.columns()); + auto num_rows_read = std::min(max_block_size, cur_row_group_left_rows); + for (size_t i = 0; i < header.columns(); i++) + { + columns[i] = castColumn( + column_readers[i]->readBatch(num_rows_read, header.getByPosition(i).name), + header.getByPosition(i).type); + } + left_rows -= num_rows_read; + cur_row_group_left_rows -= num_rows_read; + + return Chunk{std::move(columns), num_rows_read}; +} + +void ParquetRecordReader::loadNextRowGroup() +{ + Stopwatch watch(CLOCK_MONOTONIC); + cur_row_group_reader = file_reader->RowGroup(row_groups_indices[next_row_group_idx]); + + column_readers.clear(); + for (size_t i = 0; i < parquet_col_indice.size(); i++) + { + ColReaderFactory factory( + arrow_properties, + *file_reader->metadata()->schema()->Column(parquet_col_indice[i]), + header.getByPosition(i).type, + cur_row_group_reader->metadata()->ColumnChunk(parquet_col_indice[i]), + cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i])); + column_readers.emplace_back(factory.makeReader()); + } + + auto duration = watch.elapsedNanoseconds() / 1e6; + LOG_DEBUG(log, "begin to read row group {} consumed {} ms", row_groups_indices[next_row_group_idx], duration); + + ++next_row_group_idx; + cur_row_group_left_rows = cur_row_group_reader->metadata()->num_rows(); +} + +Int64 ParquetRecordReader::getTotalRows(const parquet::FileMetaData & meta_data) +{ + Int64 res = 0; + for (auto idx : row_groups_indices) + { + res += meta_data.RowGroup(idx)->num_rows(); + } + return res; +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h new file mode 100644 index 00000000000..f3b20f2d217 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "ParquetColumnReader.h" + +namespace DB +{ + +class ParquetRecordReader +{ +public: + ParquetRecordReader( + Block header_, + parquet::ArrowReaderProperties arrow_properties_, + parquet::ReaderProperties reader_properties_, + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + const FormatSettings & format_settings, + std::vector row_groups_indices_, + std::shared_ptr metadata = nullptr); + + Chunk readChunk(); + +private: + std::unique_ptr file_reader; + parquet::ArrowReaderProperties arrow_properties; + + Block header; + + std::shared_ptr cur_row_group_reader; + ParquetColReaders column_readers; + + UInt64 max_block_size; + + std::vector parquet_col_indice; + std::vector row_groups_indices; + UInt64 left_rows; + UInt64 cur_row_group_left_rows = 0; + int next_row_group_idx = 0; + + Poco::Logger * log; + + void loadNextRowGroup(); + Int64 getTotalRows(const parquet::FileMetaData & meta_data); +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp index 9b51ca0c295..ce859b38b3c 100644 --- a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -33,7 +33,7 @@ /// * `def` and `rep` arrays can be longer than `primitive_column`, because they include nulls and /// empty arrays; the values in primitive_column correspond to positions where def[i] == max_def. /// -/// If you do want to learn it, dremel paper: https://research.google/pubs/pub36632/ +/// If you do want to learn it, see dremel paper: https://research.google/pubs/pub36632/ /// Instead of reading the whole paper, try staring at figures 2-3 for a while - it might be enough. /// (Why does Parquet do all this instead of just storing array lengths and null masks? I'm not /// really sure.) @@ -430,13 +430,16 @@ void prepareColumnNullable( if (schemas[child_schema_idx].repetition_type == parq::FieldRepetitionType::REQUIRED) { - /// Normal case: we just slap a FieldRepetitionType::OPTIONAL onto the nested column. + /// Normal case: the column inside Nullable is a primitive type (not Nullable/Array/Map). + /// Just slap a FieldRepetitionType::OPTIONAL onto it. schemas[child_schema_idx].repetition_type = parq::FieldRepetitionType::OPTIONAL; } else { /// Weird case: Nullable(Nullable(...)). Or Nullable(Tuple(Nullable(...))), etc. /// This is probably not allowed in ClickHouse, but let's support it just in case. + /// The nested column already has a nontrivial repetition type, so we have to wrap it in a + /// group and assign repetition type OPTIONAL to the group. auto & schema = *schemas.insert(schemas.begin() + child_schema_idx, {}); schema.__set_repetition_type(parq::FieldRepetitionType::OPTIONAL); schema.__set_name("nullable"); diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 4d71e0102d8..b1e231d7749 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -436,7 +436,7 @@ PODArray & compress(PODArray & source, PODArray & scratch, Com size_t compressed_size; snappy::RawCompress(source.data(), source.size(), scratch.data(), &compressed_size); - scratch.resize(static_cast(compressed_size)); + scratch.resize(compressed_size); return scratch; } #endif diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index d41cb3447de..04b3a64b6cb 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -3,6 +3,7 @@ #if USE_PARQUET +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -37,6 +39,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int INCORRECT_DATA; extern const int CANNOT_READ_ALL_DATA; extern const int CANNOT_PARSE_NUMBER; } @@ -45,7 +48,10 @@ namespace ErrorCodes do \ { \ if (::arrow::Status _s = (status); !_s.ok()) \ - throw Exception::createDeprecated(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ + { \ + throw Exception::createDeprecated(_s.ToString(), \ + _s.IsOutOfMemory() ? ErrorCodes::CANNOT_ALLOCATE_MEMORY : ErrorCodes::INCORRECT_DATA); \ + } \ } while (false) /// Decode min/max value from column chunk statistics. @@ -440,9 +446,10 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat { auto & row_group_batch = row_group_batches[row_group_batch_idx]; - parquet::ArrowReaderProperties properties; - properties.set_use_threads(false); - properties.set_batch_size(format_settings.parquet.max_block_size); + parquet::ArrowReaderProperties arrow_properties; + parquet::ReaderProperties reader_properties(ArrowMemoryPool::instance()); + arrow_properties.set_use_threads(false); + arrow_properties.set_batch_size(format_settings.parquet.max_block_size); // When reading a row group, arrow will: // 1. Look at `metadata` to get all byte ranges it'll need to read from the file (typically one @@ -460,11 +467,11 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat // // This adds one unnecessary copy. We should probably do coalescing and prefetch scheduling on // our side instead. - properties.set_pre_buffer(true); + arrow_properties.set_pre_buffer(true); auto cache_options = arrow::io::CacheOptions::LazyDefaults(); cache_options.hole_size_limit = min_bytes_for_seek; cache_options.range_size_limit = 1l << 40; // reading the whole row group at once is fine - properties.set_cache_options(cache_options); + arrow_properties.set_cache_options(cache_options); // Workaround for a workaround in the parquet library. // @@ -477,25 +484,45 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat // other, failing an assert. So we disable pre-buffering in this case. // That version is >10 years old, so this is not very important. if (metadata->writer_version().VersionLt(parquet::ApplicationVersion::PARQUET_816_FIXED_VERSION())) - properties.set_pre_buffer(false); + arrow_properties.set_pre_buffer(false); - parquet::arrow::FileReaderBuilder builder; - THROW_ARROW_NOT_OK( - builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata)); - builder.properties(properties); - // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators. - THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader)); + if (format_settings.parquet.use_native_reader) + { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + if constexpr (std::endian::native != std::endian::little) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "parquet native reader only supports little endian system currently"); +#pragma clang diagnostic pop - THROW_ARROW_NOT_OK( - row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader)); + row_group_batch.native_record_reader = std::make_shared( + getPort().getHeader(), + arrow_properties, + reader_properties, + arrow_file, + format_settings, + row_group_batch.row_groups_idxs); + } + else + { + parquet::arrow::FileReaderBuilder builder; + THROW_ARROW_NOT_OK(builder.Open(arrow_file, reader_properties, metadata)); + builder.properties(arrow_properties); + builder.memory_pool(ArrowMemoryPool::instance()); + THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader)); - row_group_batch.arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), - "Parquet", - format_settings.parquet.allow_missing_columns, - format_settings.null_as_default, - format_settings.date_time_overflow_behavior, - format_settings.parquet.case_insensitive_column_matching); + THROW_ARROW_NOT_OK( + row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader)); + + row_group_batch.arrow_column_to_ch_column = std::make_unique( + getPort().getHeader(), + "Parquet", + format_settings.parquet.allow_missing_columns, + format_settings.null_as_default, + format_settings.date_time_overflow_behavior, + format_settings.parquet.case_insensitive_column_matching); + } } void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_batch_idx) @@ -561,6 +588,7 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un lock.unlock(); auto end_of_row_group = [&] { + row_group_batch.native_record_reader.reset(); row_group_batch.arrow_column_to_ch_column.reset(); row_group_batch.record_batch_reader.reset(); row_group_batch.file_reader.reset(); @@ -573,35 +601,56 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un // reached. Wake up read() instead. condvar.notify_all(); }; - - if (!row_group_batch.record_batch_reader) - initializeRowGroupBatchReader(row_group_batch_idx); - - auto batch = row_group_batch.record_batch_reader->Next(); - if (!batch.ok()) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString()); - - if (!*batch) + auto get_pending_chunk = [&](size_t num_rows, Chunk chunk = {}) { - end_of_row_group(); - return; - } - - auto tmp_table = arrow::Table::FromRecordBatches({*batch}); - - size_t approx_chunk_original_size = static_cast(std::ceil(static_cast(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * (*tmp_table)->num_rows())); - PendingChunk res = { - .chunk = {}, - .block_missing_values = {}, - .chunk_idx = row_group_batch.next_chunk_idx, - .row_group_batch_idx = row_group_batch_idx, - .approx_original_chunk_size = approx_chunk_original_size + size_t approx_chunk_original_size = static_cast(std::ceil( + static_cast(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * num_rows)); + return PendingChunk{ + .chunk = std::move(chunk), + .block_missing_values = {}, + .chunk_idx = row_group_batch.next_chunk_idx, + .row_group_batch_idx = row_group_batch_idx, + .approx_original_chunk_size = approx_chunk_original_size + }; }; - /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. - /// Otherwise fill the missing columns with zero values of its type. - BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; - res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + if (!row_group_batch.record_batch_reader && !row_group_batch.native_record_reader) + initializeRowGroupBatchReader(row_group_batch_idx); + + PendingChunk res; + if (format_settings.parquet.use_native_reader) + { + auto chunk = row_group_batch.native_record_reader->readChunk(); + if (!chunk) + { + end_of_row_group(); + return; + } + + // TODO support defaults_for_omitted_fields feature when supporting nested columns + auto num_rows = chunk.getNumRows(); + res = get_pending_chunk(num_rows, std::move(chunk)); + } + else + { + auto batch = row_group_batch.record_batch_reader->Next(); + if (!batch.ok()) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString()); + + if (!*batch) + { + end_of_row_group(); + return; + } + + auto tmp_table = arrow::Table::FromRecordBatches({*batch}); + res = get_pending_chunk((*tmp_table)->num_rows()); + + /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. + /// Otherwise fill the missing columns with zero values of its type. + BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; + res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + } lock.lock(); diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index b5b884b5efa..d6591f5c0a3 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -16,6 +16,7 @@ namespace DB { class ArrowColumnToCHColumn; +class ParquetRecordReader; // Parquet files contain a metadata block with the following information: // * list of columns, @@ -177,7 +178,7 @@ private: // Paused // // If max_decoding_threads <= 1: NotStarted -> Complete. - enum class Status + enum class Status : uint8_t { NotStarted, Running, @@ -210,6 +211,9 @@ private: std::vector row_groups_idxs; // These are only used by the decoding thread, so don't require locking the mutex. + // If use_native_reader, only native_record_reader is used; + // otherwise, only native_record_reader is not used. + std::shared_ptr native_record_reader; std::unique_ptr file_reader; std::shared_ptr record_batch_reader; std::unique_ptr arrow_column_to_ch_column; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9c85dab70c4..2662232a048 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -145,11 +145,10 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more /// convenient to do the squashing here. It's also parallelized here. - if (chunk.getNumRows() != 0) { staging_rows += chunk.getNumRows(); - staging_bytes += chunk.bytes(); + staging_bytes += chunk.allocatedBytes(); staging_chunks.push_back(std::move(chunk)); } @@ -282,11 +281,15 @@ void ParquetBlockOutputFormat::writeRowGroup(std::vector chunks) writeUsingArrow(std::move(chunks)); else { - Chunk concatenated = std::move(chunks[0]); - for (size_t i = 1; i < chunks.size(); ++i) - concatenated.append(chunks[i]); - chunks.clear(); - + Chunk concatenated; + while (!chunks.empty()) + { + if (concatenated.empty()) + concatenated.swap(chunks.back()); + else + concatenated.append(chunks.back()); + chunks.pop_back(); + } writeRowGroupInOneThread(std::move(concatenated)); } } @@ -327,7 +330,7 @@ void ParquetBlockOutputFormat::writeUsingArrow(std::vector chunks) auto result = parquet::arrow::FileWriter::Open( *arrow_table->schema(), - arrow::default_memory_pool(), + ArrowMemoryPool::instance(), sink, builder.build(), writer_props_builder.build()); diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 086b5bfada2..b1dbe68579f 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include namespace DB @@ -16,7 +18,14 @@ PrettyBlockOutputFormat::PrettyBlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, bool color_) : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()), color(color_), mono_block(mono_block_) { - readable_number_tip = header_.getColumns().size() == 1 && WhichDataType(header_.getDataTypes()[0]->getTypeId()).isNumber(); + /// Decide whether we should print a tip near the single number value in the result. + if (header_.getColumns().size() == 1) + { + /// Check if it is a numeric type, possible wrapped by Nullable or LowCardinality. + DataTypePtr type = removeNullable(recursiveRemoveLowCardinality(header_.getDataTypes().at(0))); + if (isNumber(type)) + readable_number_tip = true; + } } @@ -497,6 +506,9 @@ void PrettyBlockOutputFormat::writeReadableNumberTip(const Chunk & chunk) if (!is_single_number) return; + if (columns[0]->isNullAt(0)) + return; + auto value = columns[0]->getFloat64(0); auto threshold = format_settings.pretty.output_format_pretty_single_large_number_tip_threshold; diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 4fc62afa125..a5cbb85eecd 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -13,10 +13,14 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } -RegexpFieldExtractor::RegexpFieldExtractor(const FormatSettings & format_settings) : regexp(format_settings.regexp.regexp), skip_unmatched(format_settings.regexp.skip_unmatched) +RegexpFieldExtractor::RegexpFieldExtractor(const FormatSettings & format_settings) : regexp_str(format_settings.regexp.regexp), regexp(regexp_str), skip_unmatched(format_settings.regexp.skip_unmatched) { + if (regexp_str.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The regular expression is not set for the `Regexp` format. It requires setting the value of the `format_regexp` setting."); + size_t fields_count = regexp.NumberOfCapturingGroups(); matched_fields.resize(fields_count); re2_arguments.resize(fields_count); @@ -58,8 +62,8 @@ bool RegexpFieldExtractor::parseRow(PeekableReadBuffer & buf) static_cast(re2_arguments_ptrs.size())); if (!match && !skip_unmatched) - throw Exception(ErrorCodes::INCORRECT_DATA, "Line \"{}\" doesn't match the regexp.", - std::string(buf.position(), line_to_match)); + throw Exception(ErrorCodes::INCORRECT_DATA, "Line \"{}\" doesn't match the regexp: `{}`", + std::string(buf.position(), line_to_match), regexp_str); buf.position() += line_size; if (!buf.eof() && !checkChar('\n', buf)) diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index 7612228f8c4..8016593691f 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -31,6 +31,7 @@ public: size_t getNumberOfGroups() const { return regexp.NumberOfCapturingGroups(); } private: + String regexp_str; const re2::RE2 regexp; // The vector of fields extracted from line using regexp. std::vector matched_fields; diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 29bc0012dc0..4d67bc1a4e9 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; extern const int CANNOT_READ_ALL_DATA; extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int TYPE_MISMATCH; } @@ -134,7 +135,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex /// If the key is not found, skip the value. NullOutput sink; - readEscapedStringInto(sink, *in); + readEscapedStringInto(sink, *in); } else { @@ -190,7 +191,16 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex /// Fill in the not met columns with default values. for (size_t i = 0; i < num_columns; ++i) if (!seen_columns[i]) - header.getByPosition(i).type->insertDefaultInto(*columns[i]); + { + const auto & type = header.getByPosition(i).type; + if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(type)) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Cannot insert NULL value into a column `{}` of type '{}'", + header.getByPosition(i).name, + type->getName()); + type->insertDefaultInto(*columns[i]); + } /// return info about defaults set if (format_settings.defaults_for_omitted_fields) diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 85b1797dab8..6d4dcba9e60 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -10,6 +10,8 @@ #include #include #include +#include +#include "Formats/FormatSettings.h" namespace DB { @@ -28,7 +30,8 @@ static void checkForCarriageReturn(ReadBuffer & in) throw Exception(ErrorCodes::INCORRECT_DATA, "\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format." " You must transform your file to Unix format." - "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r."); + "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r" + "\nor else enable setting 'input_format_tsv_crlf_end_of_line'"); } TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( @@ -92,7 +95,12 @@ void TabSeparatedFormatReader::skipRowEndDelimiter() if (buf->eof()) return; - if (unlikely(first_row)) + if (format_settings.tsv.crlf_end_of_line_input) + { + if (*buf->position() == '\r') + ++buf->position(); + } + else if (unlikely(first_row)) { checkForCarriageReturn(*buf); first_row = false; @@ -105,14 +113,15 @@ template String TabSeparatedFormatReader::readFieldIntoString() { String field; + bool support_crlf = format_settings.tsv.crlf_end_of_line_input; if (is_raw) readString(field, *buf); else { if constexpr (read_string) - readEscapedString(field, *buf); + support_crlf ? readEscapedStringCRLF(field, *buf) : readEscapedString(field, *buf); else - readTSVField(field, *buf); + support_crlf ? readTSVFieldCRLF(field, *buf) : readTSVField(field, *buf); } return field; } @@ -123,7 +132,7 @@ void TabSeparatedFormatReader::skipField() if (is_raw) readStringInto(out, *buf); else - readEscapedStringInto(out, *buf); + format_settings.tsv.crlf_end_of_line_input ? readEscapedStringInto(out, *buf) : readEscapedStringInto(out, *buf); } void TabSeparatedFormatReader::skipHeaderRow() @@ -155,7 +164,7 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/) { const bool at_delimiter = !is_last_file_column && !buf->eof() && *buf->position() == '\t'; - const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n'); + const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || (format_settings.tsv.crlf_end_of_line_input && *buf->position() == '\r')); if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) { @@ -220,7 +229,10 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) try { - assertChar('\n', *buf); + if (!format_settings.tsv.crlf_end_of_line_input) + assertChar('\n', *buf); + else + assertChar('\r', *buf); } catch (const DB::Exception &) { @@ -233,7 +245,10 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) else if (*buf->position() == '\r') { out << "ERROR: Carriage return found where line feed is expected." - " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; + " It's like your file has DOS/Windows style line separators. \n" + "You must transform your file to Unix format. \n" + "But if you really need carriage return at end of string value of last column, you need to escape it as \\r \n" + "or else enable setting 'input_format_tsv_crlf_end_of_line'"; } else { @@ -348,7 +363,7 @@ void TabSeparatedFormatReader::skipRow() bool TabSeparatedFormatReader::checkForEndOfRow() { - return buf->eof() || *buf->position() == '\n'; + return buf->eof() || *buf->position() == '\n' || (format_settings.tsv.crlf_end_of_line_input && *buf->position() == '\r'); } TabSeparatedSchemaReader::TabSeparatedSchemaReader( @@ -402,6 +417,8 @@ void registerInputFormatTabSeparated(FormatFactory & factory) registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func); registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func); + if (is_raw) + registerWithNamesAndTypes("Raw", register_func); } } @@ -433,6 +450,8 @@ void registerTSVSchemaReader(FormatFactory & factory) registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func); registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func); + if (is_raw) + registerWithNamesAndTypes("Raw", register_func); } } @@ -506,8 +525,12 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory) registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func); registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func); + if (is_raw) + registerWithNamesAndTypes("Raw", register_func); markFormatWithNamesAndTypesSupportsSamplingColumns(is_raw ? "TSVRaw" : "TSV", factory); markFormatWithNamesAndTypesSupportsSamplingColumns(is_raw ? "TabSeparatedRaw" : "TabSeparated", factory); + if (is_raw) + markFormatWithNamesAndTypesSupportsSamplingColumns("Raw", factory); } // We can use the same segmentation engine for TSKV. diff --git a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp index a4a5aea26cb..c8384c09be6 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp @@ -95,7 +95,10 @@ void registerOutputFormatTabSeparated(FormatFactory & factory) registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func); registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func); if (is_raw) + { registerWithNamesAndTypes("LineAsString", register_func); + registerWithNamesAndTypes("Raw", register_func); + } } } diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 38870473289..9a7bc03ea78 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -84,7 +84,7 @@ public: void readPrefix(); void skipField(EscapingRule escaping_rule); - inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); } + void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); } template ReturnType tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end); diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 353de76eea8..1493779ec2d 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -572,9 +572,16 @@ bool ValuesBlockInputFormat::checkDelimiterAfterValue(size_t column_idx) skipWhitespaceIfAny(*buf); if (likely(column_idx + 1 != num_columns)) + { return checkChar(',', *buf); + } else + { + /// Optional trailing comma. + if (checkChar(',', *buf)) + skipWhitespaceIfAny(*buf); return checkChar(')', *buf); + } } bool ValuesBlockInputFormat::shouldDeduceNewTemplate(size_t column_idx) diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index f82a8c8ab64..0abafc896ff 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -49,7 +49,7 @@ private: ValuesBlockInputFormat(std::unique_ptr buf_, const Block & header_, const RowInputFormatParams & params_, const FormatSettings & format_settings_); - enum class ParserType + enum class ParserType : uint8_t { Streaming, BatchTemplate, diff --git a/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp b/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp index cfaefbe663b..4852af9f0c8 100644 --- a/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp @@ -21,17 +21,13 @@ VerticalRowOutputFormat::VerticalRowOutputFormat( Widths name_widths(columns); size_t max_name_width = 0; - String serialized_value; - for (size_t i = 0; i < columns; ++i) { /// Note that number of code points is just a rough approximation of visible string width. const String & name = sample.getByPosition(i).name; name_widths[i] = UTF8::computeWidth(reinterpret_cast(name.data()), name.size()); - - if (name_widths[i] > max_name_width) - max_name_width = name_widths[i]; + max_name_width = std::max(name_widths[i], max_name_width); } names_and_paddings.resize(columns); diff --git a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp index fcf338577f8..c1dd77aecaf 100644 --- a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp +++ b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp @@ -57,13 +57,11 @@ std::pair RowInputFormatWithDiagnosticInfo::getDiagnosticAndRawD max_length_of_column_name = 0; for (size_t i = 0; i < header.columns(); ++i) - if (header.safeGetByPosition(i).name.size() > max_length_of_column_name) - max_length_of_column_name = header.safeGetByPosition(i).name.size(); + max_length_of_column_name = std::max(header.safeGetByPosition(i).name.size(), max_length_of_column_name); max_length_of_data_type_name = 0; for (size_t i = 0; i < header.columns(); ++i) - if (header.safeGetByPosition(i).type->getName().size() > max_length_of_data_type_name) - max_length_of_data_type_name = header.safeGetByPosition(i).type->getName().size(); + max_length_of_data_type_name = std::max(header.safeGetByPosition(i).type->getName().size(), max_length_of_data_type_name); /// Roll back the cursor to the beginning of the previous or current row and parse all over again. But now we derive detailed information. diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index 2ad6a825c8f..ae30d741c2f 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int LOGICAL_ERROR; + extern const int TYPE_MISMATCH; } namespace @@ -124,6 +125,17 @@ void RowInputFormatWithNamesAndTypes::readPrefix() } } } + + if (format_settings.force_null_for_omitted_fields) + { + for (auto index : column_mapping->not_presented_columns) + if (!isNullableOrLowCardinalityNullable(data_types[index])) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Cannot insert NULL value into a column type '{}' at index {}", + data_types[index]->getName(), + index); + } } void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector & column_names_out, std::vector & type_names_out) @@ -217,7 +229,15 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE { const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column]; if (rem_column_index) + { + if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(data_types[*rem_column_index])) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Cannot insert NULL value into a column type '{}' at index {}", + data_types[*rem_column_index]->getName(), + *rem_column_index); columns[*rem_column_index]->insertDefault(); + } ++file_column; } break; diff --git a/src/Processors/IInflatingTransform.cpp b/src/Processors/IInflatingTransform.cpp index ffa5b55dc76..a59eda0feb2 100644 --- a/src/Processors/IInflatingTransform.cpp +++ b/src/Processors/IInflatingTransform.cpp @@ -45,8 +45,13 @@ IInflatingTransform::Status IInflatingTransform::prepare() { if (input.isFinished()) { - output.finish(); - return Status::Finished; + if (is_finished) + { + output.finish(); + return Status::Finished; + } + is_finished = true; + return Status::Ready; } input.setNeeded(); @@ -73,6 +78,14 @@ void IInflatingTransform::work() generated = true; can_generate = canGenerate(); } + else if (is_finished) + { + if (can_generate || generated || has_input) + throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot finish work because it has generated data or has input data"); + + current_chunk = getRemaining(); + generated = !current_chunk.empty(); + } else { if (!has_input) diff --git a/src/Processors/IInflatingTransform.h b/src/Processors/IInflatingTransform.h index 0ad12f6cd65..0cb7fc06cc4 100644 --- a/src/Processors/IInflatingTransform.h +++ b/src/Processors/IInflatingTransform.h @@ -10,13 +10,14 @@ namespace DB /// for (chunk : input_chunks) /// { /// transform.consume(chunk); -/// /// while (transform.canGenerate()) /// { /// transformed_chunk = transform.generate(); /// ... (process transformed chunk) /// } /// } +/// transformed_chunk = transform.getRemaining(); +/// ... (process remaining data) /// class IInflatingTransform : public IProcessor { @@ -32,6 +33,7 @@ protected: virtual void consume(Chunk chunk) = 0; virtual bool canGenerate() = 0; virtual Chunk generate() = 0; + virtual Chunk getRemaining() { return {}; } public: IInflatingTransform(Block input_header, Block output_header); @@ -41,6 +43,9 @@ public: InputPort & getInputPort() { return input; } OutputPort & getOutputPort() { return output; } + + /// canGenerate can flush data when input is finished. + bool is_finished = false; }; } diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 8b160153733..5ab5e5277aa 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -36,8 +36,6 @@ std::string IProcessor::statusToName(Status status) case Status::ExpandPipeline: return "ExpandPipeline"; } - - UNREACHABLE(); } } diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 56b4509fe00..63f32d8deb7 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -134,7 +134,7 @@ public: virtual String getName() const = 0; - enum class Status + enum class Status : uint8_t { /// Processor needs some data at its inputs to proceed. /// You need to run another processor to generate required input and then call 'prepare' again. diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index 3bd0b532d90..a77bb0dabfc 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -70,34 +70,12 @@ static AggregatingSortedAlgorithm::ColumnsDefinition defineColumns( return def; } -static MutableColumns getMergedColumns(const Block & header, const AggregatingSortedAlgorithm::ColumnsDefinition & def) -{ - MutableColumns columns; - columns.resize(header.columns()); - - for (const auto & desc : def.columns_to_simple_aggregate) - { - const auto & type = desc.nested_type ? desc.nested_type - : desc.real_type; - columns[desc.column_number] = type->createColumn(); - } - - for (size_t i = 0; i < columns.size(); ++i) - if (!columns[i]) - columns[i] = header.getByPosition(i).type->createColumn(); - - return columns; -} - /// Remove constants and LowCardinality for SimpleAggregateFunction static void preprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::ColumnsDefinition & def) { auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - for (const auto & desc : def.columns_to_simple_aggregate) if (desc.nested_type) columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]); @@ -159,12 +137,24 @@ AggregatingSortedAlgorithm::SimpleAggregateDescription::~SimpleAggregateDescript AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) - : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_), def(def_) + : MergedData(false, max_block_size_rows_, max_block_size_bytes_), def(def_) { +} + +void AggregatingSortedAlgorithm::AggregatingMergedData::initialize(const DB::Block & header, const IMergingAlgorithm::Inputs & inputs) +{ + MergedData::initialize(header, inputs); + + for (const auto & desc : def.columns_to_simple_aggregate) + { + const auto & type = desc.nested_type ? desc.nested_type + : desc.real_type; + columns[desc.column_number] = type->createColumn(); + } + initAggregateDescription(); /// Just to make startGroup() simpler. @@ -267,12 +257,15 @@ AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( size_t max_block_size_bytes_) : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, description_) , columns_definition(defineColumns(header_, description_)) - , merged_data(getMergedColumns(header_, columns_definition), max_block_size_rows_, max_block_size_bytes_, columns_definition) + , merged_data(max_block_size_rows_, max_block_size_bytes_, columns_definition) { } void AggregatingSortedAlgorithm::initialize(Inputs inputs) { + removeConstAndSparse(inputs); + merged_data.initialize(header, inputs); + for (auto & input : inputs) if (input.chunk) preprocessChunk(input.chunk, columns_definition); @@ -282,6 +275,7 @@ void AggregatingSortedAlgorithm::initialize(Inputs inputs) void AggregatingSortedAlgorithm::consume(Input & input, size_t source_num) { + removeConstAndSparse(input); preprocessChunk(input.chunk, columns_definition); updateCursor(input, source_num); } diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index db8ee66ab2b..53c103e7038 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -102,11 +102,12 @@ private: public: AggregatingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) override; + /// Group is a group of rows with the same sorting key. It represents single row in result. /// Algorithm is: start group, add several rows, finish group. /// Then pull chunk when enough groups were added. diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 8948cee217c..07ee8f4ddef 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -31,8 +31,13 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm( LoggerPtr log_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks( + header_, + num_inputs, + std::move(description_), + out_row_sources_buf_, + max_row_refs, + std::make_unique(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)) , sign_column_number(header_.getPositionByName(sign_column)) , only_positive_sign(only_positive_sign_) , log(log_) @@ -65,7 +70,7 @@ void CollapsingSortedAlgorithm::reportIncorrectData() void CollapsingSortedAlgorithm::insertRow(RowRef & row) { - merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); + merged_data->insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); } std::optional CollapsingSortedAlgorithm::insertRows() @@ -90,8 +95,8 @@ std::optional CollapsingSortedAlgorithm::insertRows() if (count_positive >= count_negative) { - if (merged_data.hasEnoughRows()) - res = merged_data.pull(); + if (merged_data->hasEnoughRows()) + res = merged_data->pull(); insertRow(last_positive_row); @@ -121,8 +126,8 @@ std::optional CollapsingSortedAlgorithm::insertRows() IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() { /// Rare case, which may happen when index_granularity is 1, but we needed to insert 2 rows inside insertRows(). - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` while (queue.isValid()) @@ -148,8 +153,8 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() if (key_differs) { /// if there are enough rows and the last one is calculated completely - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// We write data for the previous primary key. auto res = insertRows(); @@ -220,7 +225,7 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() return Status(std::move(*res)); } - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } } diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h index be1a3a3bf33..99fd95d82d9 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h @@ -42,8 +42,6 @@ public: Status merge() override; private: - MergedData merged_data; - const size_t sign_column_number; const bool only_positive_sign; diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index a5befca7233..466adf93538 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -40,6 +40,7 @@ FinishAggregatingInOrderAlgorithm::FinishAggregatingInOrderAlgorithm( void FinishAggregatingInOrderAlgorithm::initialize(Inputs inputs) { + removeConstAndSparse(inputs); current_inputs = std::move(inputs); states.resize(num_inputs); for (size_t i = 0; i < num_inputs; ++i) @@ -48,6 +49,7 @@ void FinishAggregatingInOrderAlgorithm::initialize(Inputs inputs) void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num) { + removeConstAndSparse(input); if (!input.chunk.hasRows()) return; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index 814625d7aee..2b891592b20 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -46,8 +46,8 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( size_t max_block_size_bytes_, Graphite::Params params_, time_t time_of_merge_) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), false, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs, std::make_unique(false, max_block_size_rows_, max_block_size_bytes_)) + , graphite_rollup_merged_data(assert_cast(*merged_data)) , params(std::move(params_)) , time_of_merge(time_of_merge_) { @@ -63,7 +63,7 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( } } - merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state); + graphite_rollup_merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state); columns_definition = defineColumns(header_, params); } @@ -113,7 +113,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() const DateLUTImpl & date_lut = timezone ? timezone->getTimeZone() : DateLUT::instance(); - /// Take rows in needed order and put them into `merged_data` until we get `max_block_size` rows. + /// Take rows in needed order and put them into `graphite_rollup_merged_data` until we get `max_block_size` rows. /// /// Variables starting with current_* refer to the rows previously popped from the queue that will /// contribute towards current output row. @@ -142,10 +142,10 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() if (is_new_key) { /// Accumulate the row that has maximum version in the previous group of rows with the same key: - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) accumulateRow(current_subgroup_newest_row); - Graphite::RollupRule next_rule = merged_data.currentRule(); + Graphite::RollupRule next_rule = graphite_rollup_merged_data.currentRule(); if (new_path) next_rule = selectPatternForPath(this->params, next_path); @@ -167,15 +167,15 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() if (will_be_new_key) { - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) { finishCurrentGroup(); /// We have enough rows - return, but don't advance the loop. At the beginning of the /// next call to merge() the same next_cursor will be processed once more and /// the next output row will be created from it. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (graphite_rollup_merged_data.hasEnoughRows()) + return Status(graphite_rollup_merged_data.pull()); } /// At this point previous row has been fully processed, so we can advance the loop @@ -218,28 +218,28 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() } /// Write result row for the last group. - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) { accumulateRow(current_subgroup_newest_row); finishCurrentGroup(); } - return Status(merged_data.pull(), true); + return Status(graphite_rollup_merged_data.pull(), true); } void GraphiteRollupSortedAlgorithm::startNextGroup(SortCursor & cursor, Graphite::RollupRule next_rule) { - merged_data.startNextGroup(cursor->all_columns, cursor->getRow(), next_rule, columns_definition); + graphite_rollup_merged_data.startNextGroup(cursor->all_columns, cursor->getRow(), next_rule, columns_definition); } void GraphiteRollupSortedAlgorithm::finishCurrentGroup() { - merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition); + graphite_rollup_merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition); } void GraphiteRollupSortedAlgorithm::accumulateRow(RowRef & row) { - merged_data.accumulateRow(row, columns_definition); + graphite_rollup_merged_data.accumulateRow(row, columns_definition); } void GraphiteRollupSortedAlgorithm::GraphiteRollupMergedData::startNextGroup( diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index a20a6eaf11f..aaa3859efb6 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -53,7 +53,7 @@ public: { public: using MergedData::MergedData; - ~GraphiteRollupMergedData(); + ~GraphiteRollupMergedData() override; void startNextGroup(const ColumnRawPtrs & raw_columns, size_t row, Graphite::RollupRule next_rule, ColumnsDefinition & def); @@ -72,7 +72,7 @@ public: }; private: - GraphiteRollupMergedData merged_data; + GraphiteRollupMergedData & graphite_rollup_merged_data; const Graphite::Params params; ColumnsDefinition columns_definition; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h index 6e352c3f104..9a1c7c24270 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h @@ -39,7 +39,6 @@ public: void set(Chunk chunk_) { - convertToFullIfSparse(chunk_); chunk = std::move(chunk_); skip_last_row = false; } @@ -47,6 +46,18 @@ public: using Inputs = std::vector; + static void removeConstAndSparse(Input & input) + { + convertToFullIfConst(input.chunk); + convertToFullIfSparse(input.chunk); + } + + static void removeConstAndSparse(Inputs & inputs) + { + for (auto & input : inputs) + removeConstAndSparse(input); + } + virtual const char * getName() const = 0; virtual void initialize(Inputs inputs) = 0; virtual void consume(Input & input, size_t source_num) = 0; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h index b8e73aec0dc..cf4b8589441 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h @@ -34,9 +34,9 @@ protected: return !lhs.hasEqualSortColumnsWith(rhs); } -private: Block header; +private: /// Inputs currently being merged. Inputs current_inputs; SortCursorImpls cursors; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp index c8b69382e89..47b7ddf38dc 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp @@ -5,7 +5,7 @@ namespace DB { IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks( - Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs) + Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs, std::unique_ptr merged_data_) : header(std::move(header_)) , description(std::move(description_)) , chunk_allocator(num_inputs + max_row_refs) @@ -13,28 +13,20 @@ IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks( , sources(num_inputs) , sources_origin_merge_tree_part_level(num_inputs) , out_row_sources_buf(out_row_sources_buf_) + , merged_data(std::move(merged_data_)) { } -static void prepareChunk(Chunk & chunk) -{ - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - - chunk.setColumns(std::move(columns), num_rows); -} - void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs) { + removeConstAndSparse(inputs); + merged_data->initialize(header, inputs); + for (size_t source_num = 0; source_num < inputs.size(); ++source_num) { if (!inputs[source_num].chunk) continue; - prepareChunk(inputs[source_num].chunk); - auto & source = sources[source_num]; source.skip_last_row = inputs[source_num].skip_last_row; @@ -52,7 +44,7 @@ void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs) void IMergingAlgorithmWithSharedChunks::consume(Input & input, size_t source_num) { - prepareChunk(input.chunk); + removeConstAndSparse(input); auto & source = sources[source_num]; source.skip_last_row = input.skip_last_row; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h index 3b4f9e92c5d..bc1aafe93f7 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include namespace DB @@ -10,7 +11,7 @@ class IMergingAlgorithmWithSharedChunks : public IMergingAlgorithm { public: IMergingAlgorithmWithSharedChunks( - Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs); + Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs, std::unique_ptr merged_data_); void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; @@ -25,7 +26,6 @@ private: SortCursorImpls cursors; protected: - struct Source { detail::SharedChunkPtr chunk; @@ -43,6 +43,8 @@ protected: /// If it is not nullptr then it should be populated during execution WriteBuffer * out_row_sources_buf = nullptr; + std::unique_ptr merged_data; + using RowRef = detail::RowRefWithOwnedChunk; void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, sources[cursor.impl->order].chunk); } bool skipLastRowFor(size_t input_number) const { return sources[input_number].skip_last_row; } diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index 7ffde835ad0..c5bb074bb0c 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -1,7 +1,9 @@ #pragma once #include +#include #include +#include #include #include @@ -19,17 +21,40 @@ namespace ErrorCodes class MergedData { public: - explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_) - : columns(std::move(columns_)), max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_) + explicit MergedData(bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_) + : max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_) { } + virtual void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) + { + columns = header.cloneEmptyColumns(); + std::vector source_columns; + source_columns.resize(columns.size()); + for (const auto & input : inputs) + { + if (!input.chunk) + continue; + + const auto & input_columns = input.chunk.getColumns(); + for (size_t i = 0; i != input_columns.size(); ++i) + source_columns[i].push_back(input_columns[i]); + } + + for (size_t i = 0; i != columns.size(); ++i) + { + if (columns[i]->hasDynamicStructure()) + columns[i]->takeDynamicStructureFromSourceColumns(source_columns[i]); + } + } + /// Pull will be called at next prepare call. void flush() { need_flush = true; } void insertRow(const ColumnRawPtrs & raw_columns, size_t row, size_t block_size) { size_t num_columns = raw_columns.size(); + chassert(columns.size() == num_columns); for (size_t i = 0; i < num_columns; ++i) columns[i]->insertFrom(*raw_columns[i], row); @@ -41,6 +66,7 @@ public: void insertRows(const ColumnRawPtrs & raw_columns, size_t start_index, size_t length, size_t block_size) { size_t num_columns = raw_columns.size(); + chassert(columns.size() == num_columns); for (size_t i = 0; i < num_columns; ++i) { if (length == 1) @@ -61,6 +87,7 @@ public: UInt64 num_rows = chunk.getNumRows(); UInt64 num_columns = chunk.getNumColumns(); + chassert(columns.size() == num_columns); auto chunk_columns = chunk.mutateColumns(); /// Here is a special code for constant columns. @@ -69,9 +96,21 @@ public: for (size_t i = 0; i < num_columns; ++i) { if (isColumnConst(*columns[i])) + { columns[i] = columns[i]->cloneResized(num_rows); + } + /// For columns with Dynamic structure we cannot just take column from input chunk because resulting column may have + /// different Dynamic structure (and have some merge statistics after calling takeDynamicStructureFromSourceColumns). + /// We should insert into data resulting column using insertRangeFrom. + else if (columns[i]->hasDynamicStructure()) + { + columns[i] = columns[i]->cloneEmpty(); + columns[i]->insertRangeFrom(*chunk_columns[i], 0, num_rows); + } else + { columns[i] = std::move(chunk_columns[i]); + } } if (rows_size < num_rows) @@ -144,6 +183,8 @@ public: UInt64 totalAllocatedBytes() const { return total_allocated_bytes; } UInt64 maxBlockSize() const { return max_block_size; } + virtual ~MergedData() = default; + protected: MutableColumns columns; diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp index 408d9a16c31..3a9cf7ee141 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp @@ -18,7 +18,7 @@ MergingSortedAlgorithm::MergingSortedAlgorithm( WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : header(std::move(header_)) - , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size_, max_block_size_bytes_) + , merged_data(use_average_block_sizes, max_block_size_, max_block_size_bytes_) , description(description_) , limit(limit_) , out_row_sources_buf(out_row_sources_buf_) @@ -49,16 +49,16 @@ void MergingSortedAlgorithm::addInput() void MergingSortedAlgorithm::initialize(Inputs inputs) { + removeConstAndSparse(inputs); + merged_data.initialize(header, inputs); current_inputs = std::move(inputs); for (size_t source_num = 0; source_num < current_inputs.size(); ++source_num) { auto & chunk = current_inputs[source_num].chunk; - if (!chunk) continue; - convertToFullIfConst(chunk); cursors[source_num] = SortCursorImpl(header, chunk.getColumns(), description, source_num); } @@ -82,7 +82,7 @@ void MergingSortedAlgorithm::initialize(Inputs inputs) void MergingSortedAlgorithm::consume(Input & input, size_t source_num) { - convertToFullIfConst(input.chunk); + removeConstAndSparse(input); current_inputs[source_num].swap(input); cursors[source_num].reset(current_inputs[source_num].chunk.getColumns(), header); diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 9e5c1249c4e..7b2c7d82a01 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -41,9 +41,8 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( bool use_average_block_sizes, bool cleanup_, bool enable_vertical_final_) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_) - , enable_vertical_final(enable_vertical_final_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs, std::make_unique(use_average_block_sizes, max_block_size_rows, max_block_size_bytes)) + , cleanup(cleanup_), enable_vertical_final(enable_vertical_final_) { if (!is_deleted_column.empty()) is_deleted_column_number = header_.getPositionByName(is_deleted_column); @@ -75,7 +74,7 @@ void ReplacingSortedAlgorithm::insertRow() to_be_emitted.push(std::move(selected_row.owned_chunk)); } else - merged_data.insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows()); + merged_data->insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows()); selected_row.clear(); } @@ -109,8 +108,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() if (key_differs) { /// If there are enough rows and the last one is calculated completely - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// Write the data for the previous primary key. if (!selected_row.empty()) @@ -168,8 +167,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() } /// If have enough rows, return block, because it prohibited to overflow requested number of rows. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// We will write the data for the last primary key. if (!selected_row.empty()) @@ -193,7 +192,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() return emitChunk(chunk, to_be_emitted.empty()); } - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } void ReplacingSortedAlgorithm::saveChunkForSkippingFinalFromSelectedRow() diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2fbd73c9072..a3ccccf0845 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -44,8 +44,6 @@ public: Status merge() override; private: - MergedData merged_data; - ssize_t is_deleted_column_number = -1; ssize_t version_column_number = -1; bool cleanup = false; diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 79b5dae2d6e..e2c6371c44f 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -382,47 +382,11 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns( return def; } -static MutableColumns getMergedDataColumns( - const Block & header, - const SummingSortedAlgorithm::ColumnsDefinition & def) -{ - MutableColumns columns; - size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size(); - columns.reserve(num_columns); - - for (const auto & desc : def.columns_to_aggregate) - { - // Wrap aggregated columns in a tuple to match function signature - if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) - { - size_t tuple_size = desc.column_numbers.size(); - MutableColumns tuple_columns(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - tuple_columns[i] = header.safeGetByPosition(desc.column_numbers[i]).column->cloneEmpty(); - - columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); - } - else - { - const auto & type = desc.nested_type ? desc.nested_type : desc.real_type; - columns.emplace_back(type->createColumn()); - } - } - - for (const auto & column_number : def.column_numbers_not_to_aggregate) - columns.emplace_back(header.safeGetByPosition(column_number).type->createColumn()); - - return columns; -} - static void preprocessChunk(Chunk & chunk, const SummingSortedAlgorithm::ColumnsDefinition & def) { auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - for (const auto & desc : def.columns_to_aggregate) { if (desc.nested_type) @@ -504,11 +468,44 @@ static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num, } -SummingSortedAlgorithm::SummingMergedData::SummingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) - : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_) +SummingSortedAlgorithm::SummingMergedData::SummingMergedData(UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) + : MergedData(false, max_block_size_rows_, max_block_size_bytes_) , def(def_) { +} + +void SummingSortedAlgorithm::SummingMergedData::initialize(const DB::Block & header, const IMergingAlgorithm::Inputs & inputs) +{ + MergedData::initialize(header, inputs); + + MutableColumns new_columns; + size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size(); + new_columns.reserve(num_columns); + + for (const auto & desc : def.columns_to_aggregate) + { + // Wrap aggregated columns in a tuple to match function signature + if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) + { + size_t tuple_size = desc.column_numbers.size(); + MutableColumns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + tuple_columns[i] = std::move(columns[desc.column_numbers[i]]); + + new_columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); + } + else + { + const auto & type = desc.nested_type ? desc.nested_type : desc.real_type; + new_columns.emplace_back(type->createColumn()); + } + } + + for (const auto & column_number : def.column_numbers_not_to_aggregate) + new_columns.emplace_back(std::move(columns[column_number])); + + columns = std::move(new_columns); + current_row.resize(def.column_names.size()); initAggregateDescription(); @@ -698,12 +695,15 @@ SummingSortedAlgorithm::SummingSortedAlgorithm( size_t max_block_size_bytes) : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, std::move(description_)) , columns_definition(defineColumns(header_, description, column_names_to_sum, partition_key_columns)) - , merged_data(getMergedDataColumns(header_, columns_definition), max_block_size_rows, max_block_size_bytes, columns_definition) + , merged_data(max_block_size_rows, max_block_size_bytes, columns_definition) { } void SummingSortedAlgorithm::initialize(Inputs inputs) { + removeConstAndSparse(inputs); + merged_data.initialize(header, inputs); + for (auto & input : inputs) if (input.chunk) preprocessChunk(input.chunk, columns_definition); @@ -713,6 +713,7 @@ void SummingSortedAlgorithm::initialize(Inputs inputs) void SummingSortedAlgorithm::consume(Input & input, size_t source_num) { + removeConstAndSparse(input); preprocessChunk(input.chunk, columns_definition); updateCursor(input, source_num); } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h index dbbe4e53a5f..664b171c4b9 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h @@ -65,7 +65,9 @@ public: using MergedData::insertRow; public: - SummingMergedData(MutableColumns columns_, UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + SummingMergedData(UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + + void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) override; void startGroup(ColumnRawPtrs & raw_columns, size_t row); void finishGroup(); diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp index e7a431dc1d0..9f124c6ba18 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp @@ -16,8 +16,7 @@ VersionedCollapsingAlgorithm::VersionedCollapsingAlgorithm( size_t max_block_size_bytes_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE, std::make_unique(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer. 3 is a reasonable minimum size to collapse anything. , max_rows_in_queue(std::min(std::max(3, max_block_size_rows_), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1) , current_keys(max_rows_in_queue) @@ -47,7 +46,7 @@ void VersionedCollapsingAlgorithm::insertGap(size_t gap_size) void VersionedCollapsingAlgorithm::insertRow(size_t skip_rows, const RowRef & row) { - merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); + merged_data->insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); insertGap(skip_rows); @@ -104,8 +103,8 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge() --num_rows_to_insert; /// It's ok to return here, because we didn't affect queue. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); } if (current_keys.empty()) @@ -147,13 +146,13 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge() insertRow(gap, row); current_keys.popFront(); - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); } /// Write information about last collapsed rows. insertGap(current_keys.frontGap()); - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } } diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h index d98529b301c..e6d20ddac75 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h @@ -29,8 +29,6 @@ public: Status merge() override; private: - MergedData merged_data; - size_t sign_column_number = 0; const size_t max_rows_in_queue; diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index f446ecec846..ae43295024a 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -59,6 +59,7 @@ public: const Aggregator::Params & getParams() const { return params; } const auto & getGroupingSetsParamsList() const { return grouping_sets_params; } + bool isGroupByUseNulls() const { return group_by_use_nulls; } bool inOrder() const { return !sort_description_for_merging.empty(); } bool explicitSortingRequired() const { return explicit_sorting_required_for_aggregation_in_order; } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index d4545482477..1f4f271fa6e 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -1,8 +1,6 @@ #include -#include #include -#include #include #include #include diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h index 50545d9ae81..f59123a7d88 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h @@ -1,17 +1,12 @@ #pragma once #include -#include #include #include -#include namespace DB { -class PreparedSets; -using PreparedSetsPtr = std::shared_ptr; - std::unique_ptr createLocalPlan( const ASTPtr & query_ast, const Block & header, diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 5062ff5479e..0ccb0c4492a 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -83,7 +83,6 @@ void ExpressionStep::updateOutputStream() const auto & input_sort_description = getInputStreams().front().sort_description; for (size_t i = 0, s = input_sort_description.size(); i < s; ++i) { - String alias; const auto & original_column = input_sort_description[i].column_name; const auto * alias_node = alias_finder.find(original_column); if (alias_node) diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 1b9b47f46ac..56b31b2c8ba 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -113,7 +113,6 @@ void FilterStep::updateOutputStream() const auto & input_sort_description = getInputStreams().front().sort_description; for (size_t i = 0, s = input_sort_description.size(); i < s; ++i) { - String alias; const auto & original_column = input_sort_description[i].column_name; const auto * alias_node = alias_finder.find(original_column); if (alias_node) diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index ac5ea259d2e..daca88fcceb 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -32,7 +32,7 @@ public: bool has_single_port = false; /// Sorting scope. Please keep the mutual order (more strong mode should have greater value). - enum class SortScope + enum class SortScope : uint8_t { None = 0, Chunk = 1, /// Separate chunks are sorted diff --git a/src/Processors/QueryPlan/LimitStep.cpp b/src/Processors/QueryPlan/LimitStep.cpp index 5e5a7387832..2e2c5ed7c1e 100644 --- a/src/Processors/QueryPlan/LimitStep.cpp +++ b/src/Processors/QueryPlan/LimitStep.cpp @@ -53,7 +53,6 @@ void LimitStep::describeActions(FormatSettings & settings) const { settings.out << prefix; - String str; if (with_ties) settings.out << "WITH TIES"; diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp index 476f5541812..a5062ac8216 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp +++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp @@ -133,8 +133,8 @@ void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, c else { auto num_merge_threads = memory_efficient_merge_threads - ? static_cast(memory_efficient_merge_threads) - : static_cast(max_threads); + ? memory_efficient_merge_threads + : max_threads; pipeline.addMergingAggregatedMemoryEfficientTransform(transform_params, num_merge_threads); } @@ -144,7 +144,7 @@ void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, c void MergingAggregatedStep::describeActions(FormatSettings & settings) const { - return params.explain(settings.out, settings.offset); + params.explain(settings.out, settings.offset); } void MergingAggregatedStep::describeActions(JSONBuilder::JSONMap & map) const diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 18f1496d26a..b33a373a970 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -111,8 +111,11 @@ void optimizePrimaryKeyCondition(const Stack & stack); void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes); void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes); void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &); -bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections); -bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes); + +/// Returns the name of used projection or nullopt if no projection is used. +std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections); +std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes); + bool addPlansForSets(QueryPlan & plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes); /// Enable memory bound merging of aggregation states for remote queries diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 80923159ddc..2738de1ff5f 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -46,7 +46,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.optimize_projection = from.optimize_use_projections; settings.force_use_projection = settings.optimize_projection && from.force_optimize_projection; - settings.force_projection_name = from.force_optimize_projection_name; + settings.force_projection_name = settings.optimize_projection ? from.force_optimize_projection_name.value : ""; settings.optimize_use_implicit_projections = settings.optimize_projection && from.optimize_use_implicit_projections; return settings; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 2031b64325b..8ca240b3e8b 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -253,7 +253,7 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: bool has_single_clause = table_join.getClauses().size() == 1; - if (has_single_clause) + if (has_single_clause && !filled_join) { const auto & join_clause = table_join.getClauses()[0]; size_t key_names_size = join_clause.key_names_left.size(); @@ -262,10 +262,6 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: { const auto & left_table_key_name = join_clause.key_names_left[i]; const auto & right_table_key_name = join_clause.key_names_right[i]; - - if (!join_header.has(left_table_key_name) || !join_header.has(right_table_key_name)) - continue; - const auto & left_table_column = left_stream_input_header.getByName(left_table_key_name); const auto & right_table_column = right_stream_input_header.getByName(right_table_key_name); @@ -338,9 +334,9 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: auto join_filter_push_down_actions = filter->getExpression()->splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), filter->removesFilterColumn(), left_stream_available_columns_to_push_down, - left_stream_input_header.getColumnsWithTypeAndName(), + left_stream_input_header, right_stream_available_columns_to_push_down, - right_stream_input_header.getColumnsWithTypeAndName(), + right_stream_input_header, equivalent_columns_to_push_down, equivalent_left_stream_column_to_right_stream_column, equivalent_right_stream_column_to_left_stream_column); @@ -428,6 +424,9 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes /// of the grouping sets, we could not push the filter down. if (aggregating->isGroupingSets()) { + /// Cannot push down filter if type has been changed. + if (aggregating->isGroupByUseNulls()) + return 0; const auto & actions = filter->getExpression(); const auto & filter_node = actions->findInOutputs(filter->getFilterColumnName()); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 8c5839a9803..fbd9b451ddc 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -83,7 +83,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) MergeTreeWhereOptimizer where_optimizer{ std::move(column_compressed_sizes), storage_metadata, - storage.getConditionEstimatorByPredicate(source_step_with_filter->getQueryInfo(), storage_snapshot, context), + storage.getConditionEstimatorByPredicate(storage_snapshot, source_step_with_filter->getFilterActionsDAG(), context), queried_columns, storage.supportedPrewhereColumns(), getLogger("QueryPlanOptimizePrewhere")}; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index bc1b3695d88..c175cd516ac 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -22,6 +22,7 @@ #include #include #include +#include "Storages/KeyDescription.h" #include #include @@ -332,8 +333,7 @@ InputOrderInfoPtr buildInputOrderInfo( const FixedColumns & fixed_columns, const ActionsDAGPtr & dag, const SortDescription & description, - const ActionsDAG & sorting_key_dag, - const Names & sorting_key_columns, + const KeyDescription & sorting_key, size_t limit) { //std::cerr << "------- buildInputOrderInfo " << std::endl; @@ -343,6 +343,8 @@ InputOrderInfoPtr buildInputOrderInfo( MatchedTrees::Matches matches; FixedColumns fixed_key_columns; + const auto & sorting_key_dag = sorting_key.expression->getActionsDAG(); + if (dag) { matches = matchTrees(sorting_key_dag.getOutputs(), *dag); @@ -371,9 +373,9 @@ InputOrderInfoPtr buildInputOrderInfo( size_t next_description_column = 0; size_t next_sort_key = 0; - while (next_description_column < description.size() && next_sort_key < sorting_key_columns.size()) + while (next_description_column < description.size() && next_sort_key < sorting_key.column_names.size()) { - const auto & sorting_key_column = sorting_key_columns[next_sort_key]; + const auto & sorting_key_column = sorting_key.column_names[next_sort_key]; const auto & sort_column_description = description[next_description_column]; /// If required order depend on collation, it cannot be matched with primary key order. @@ -381,6 +383,12 @@ InputOrderInfoPtr buildInputOrderInfo( if (sort_column_description.collator) break; + /// Since sorting key columns are always sorted with NULLS LAST, reading in order + /// supported only for ASC NULLS LAST ("in order"), and DESC NULLS FIRST ("reverse") + const auto column_is_nullable = sorting_key.data_types[next_sort_key]->isNullable(); + if (column_is_nullable && sort_column_description.nulls_direction != 1) + break; + /// Direction for current sort key. int current_direction = 0; bool strict_monotonic = true; @@ -691,12 +699,11 @@ InputOrderInfoPtr buildInputOrderInfo( size_t limit) { const auto & sorting_key = reading->getStorageMetadata()->getSortingKey(); - const auto & sorting_key_columns = sorting_key.column_names; return buildInputOrderInfo( fixed_columns, dag, description, - sorting_key.expression->getActionsDAG(), sorting_key_columns, + sorting_key, limit); } @@ -714,15 +721,14 @@ InputOrderInfoPtr buildInputOrderInfo( { auto storage = std::get(table); const auto & sorting_key = storage->getInMemoryMetadataPtr()->getSortingKey(); - const auto & sorting_key_columns = sorting_key.column_names; - if (sorting_key_columns.empty()) + if (sorting_key.column_names.empty()) return nullptr; auto table_order_info = buildInputOrderInfo( fixed_columns, dag, description, - sorting_key.expression->getActionsDAG(), sorting_key_columns, + sorting_key, limit); if (!table_order_info) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 915e664ea8f..df9e095af30 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -12,6 +12,7 @@ namespace DB namespace ErrorCodes { + extern const int INCORRECT_DATA; extern const int TOO_MANY_QUERY_PLAN_OPTIMIZATIONS; extern const int PROJECTION_NOT_USED; } @@ -106,7 +107,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes) { const size_t max_optimizations_to_apply = optimization_settings.max_optimizations_to_apply; - size_t num_applied_projection = 0; + std::unordered_set applied_projection_names; bool has_reading_from_mt = false; Stack stack; @@ -159,9 +160,11 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s /// Projection optimization relies on PK optimization if (optimization_settings.optimize_projection) - num_applied_projection - += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); - + { + auto applied_projection = optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); + if (applied_projection) + applied_projection_names.insert(*applied_projection); + } if (optimization_settings.aggregation_in_order) optimizeAggregationInOrder(*frame.node, nodes); @@ -180,11 +183,11 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.optimize_projection) { /// Projection optimization relies on PK optimization - if (optimizeUseNormalProjections(stack, nodes)) + if (auto applied_projection = optimizeUseNormalProjections(stack, nodes)) { - ++num_applied_projection; + applied_projection_names.insert(*applied_projection); - if (max_optimizations_to_apply && max_optimizations_to_apply < num_applied_projection) + if (max_optimizations_to_apply && max_optimizations_to_apply < applied_projection_names.size()) throw Exception(ErrorCodes::TOO_MANY_QUERY_PLAN_OPTIMIZATIONS, "Too many projection optimizations applied to query plan. Current limit {}", max_optimizations_to_apply); @@ -201,10 +204,16 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s stack.pop_back(); } - if (optimization_settings.force_use_projection && has_reading_from_mt && num_applied_projection == 0) + if (optimization_settings.force_use_projection && has_reading_from_mt && applied_projection_names.empty()) throw Exception( ErrorCodes::PROJECTION_NOT_USED, "No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1"); + + if (!optimization_settings.force_projection_name.empty() && has_reading_from_mt && !applied_projection_names.contains(optimization_settings.force_projection_name)) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Projection {} is specified in setting force_optimize_projection_name but not used", + optimization_settings.force_projection_name); } void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 64111602458..4017670ad14 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -552,35 +552,33 @@ static QueryPlan::Node * findReadingStep(QueryPlan::Node & node) return nullptr; } -bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections) +std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections) { if (node.children.size() != 1) - return false; + return {}; auto * aggregating = typeid_cast(node.step.get()); if (!aggregating) - return false; + return {}; if (!aggregating->canUseProjection()) - return false; + return {}; QueryPlan::Node * reading_node = findReadingStep(*node.children.front()); if (!reading_node) - return false; + return {}; auto * reading = typeid_cast(reading_node->step.get()); if (!reading) - return false; + return {}; if (!canUseProjectionForReadingStep(reading)) - return false; + return {}; std::shared_ptr max_added_blocks = getMaxAddedBlocks(reading); auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks, allow_implicit_projections); - const auto & parts = reading->getParts(); - const auto & alter_conversions = reading->getAlterConvertionsForParts(); const auto & query_info = reading->getQueryInfo(); const auto metadata = reading->getStorageMetadata(); ContextPtr context = reading->getContext(); @@ -592,14 +590,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & } else if (!candidates.real.empty()) { - auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions); + auto ordinary_reading_select_result = reading->selectRangesToRead(); size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks; /// Nothing to read. Ignore projections. if (ordinary_reading_marks == 0) { reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return false; + return {}; } const auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges; @@ -633,15 +631,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & if (!best_candidate) { reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return false; + return {}; } } else { - return false; + return {}; } - Context::QualifiedProjectionName projection_name; chassert(best_candidate != nullptr); QueryPlanStepPtr projection_reading; @@ -656,12 +653,6 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & Pipe pipe(std::make_shared(std::move(candidates.minmax_projection->block))); projection_reading = std::make_unique(std::move(pipe)); has_ordinary_parts = false; - - projection_name = Context::QualifiedProjectionName - { - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = candidates.minmax_projection->candidate.projection->name, - }; } else { @@ -693,12 +684,6 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & projection_reading = std::make_unique(std::move(pipe)); } - projection_name = Context::QualifiedProjectionName - { - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = best_candidate->projection->name, - }; - has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; if (has_ordinary_parts) reading->setAnalyzedResult(std::move(best_candidate->merge_tree_ordinary_select_result_ptr)); @@ -748,7 +733,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & node.children.push_back(&expr_or_filter_node); } - return true; + return best_candidate->projection->name; } } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index cac172a856f..728aaaa6fc4 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -73,16 +73,16 @@ static bool hasAllRequiredColumns(const ProjectionDescription * projection, cons } -bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) +std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) { const auto & frame = stack.back(); auto * reading = typeid_cast(frame.node->step.get()); if (!reading) - return false; + return {}; if (!canUseProjectionForReadingStep(reading)) - return false; + return {}; auto iter = stack.rbegin(); while (std::next(iter) != stack.rend()) @@ -96,7 +96,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) /// Dangling query plan node. This might be generated by StorageMerge. if (iter->node->step.get() == reading) - return false; + return {}; const auto metadata = reading->getStorageMetadata(); const auto & projections = metadata->projections; @@ -107,7 +107,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) normal_projections.push_back(&projection); if (normal_projections.empty()) - return false; + return {}; ContextPtr context = reading->getContext(); auto it = std::find_if(normal_projections.begin(), normal_projections.end(), [&](const auto * projection) @@ -126,7 +126,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) { auto & child = iter->node->children[iter->next_child - 1]; if (!query.build(*child)) - return false; + return {}; if (query.dag) query.dag->removeUnusedActions(); @@ -136,19 +136,17 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) NormalProjectionCandidate * best_candidate = nullptr; const Names & required_columns = reading->getAllColumnNames(); - const auto & parts = reading->getParts(); - const auto & alter_conversions = reading->getAlterConvertionsForParts(); const auto & query_info = reading->getQueryInfo(); MergeTreeDataSelectExecutor reader(reading->getMergeTreeData()); - auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions); + auto ordinary_reading_select_result = reading->selectRangesToRead(); size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks; /// Nothing to read. Ignore projections. if (ordinary_reading_marks == 0) { reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return false; + return {}; } const auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges; @@ -187,7 +185,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (!best_candidate) { reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return false; + return {}; } auto storage_snapshot = reading->getStorageSnapshot(); @@ -285,8 +283,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) /// Here we remove last steps from stack to be able to optimize again. /// In theory, read-in-order can be applied to projection. stack.resize(iter.base() - stack.begin()); - - return true; + return best_candidate->projection->name; } } diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp index 232d3118612..51df25b35f4 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp @@ -64,36 +64,61 @@ namespace return non_const_columns; } + /// build actions DAG from stack of steps + ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) + { + if (dag_stack.empty()) + return nullptr; + + ActionsDAGPtr path_actions = dag_stack.back()->clone(); + dag_stack.pop_back(); + while (!dag_stack.empty()) + { + ActionsDAGPtr clone = dag_stack.back()->clone(); + logActionsDAG("DAG to merge", clone); + dag_stack.pop_back(); + path_actions->mergeInplace(std::move(*clone)); + } + return path_actions; + } + bool compareAggregationKeysWithDistinctColumns( - const Names & aggregation_keys, const DistinctColumns & distinct_columns, const ActionsDAGPtr & path_actions) + const Names & aggregation_keys, const DistinctColumns & distinct_columns, std::vector> actions_chain) { logDebug("aggregation_keys", aggregation_keys); logDebug("aggregation_keys size", aggregation_keys.size()); logDebug("distinct_columns size", distinct_columns.size()); - std::set original_distinct_columns; - FindOriginalNodeForOutputName original_node_finder(path_actions); - for (const auto & column : distinct_columns) + std::set current_columns(begin(distinct_columns), end(distinct_columns)); + std::set source_columns; + for (auto & actions : actions_chain) { - logDebug("distinct column name", column); - const auto * alias_node = original_node_finder.find(String(column)); - if (!alias_node) + FindOriginalNodeForOutputName original_node_finder(buildActionsForPlanPath(actions)); + for (const auto & column : current_columns) { - logDebug("original name for alias is not found", column); - original_distinct_columns.insert(column); - } - else - { - logDebug("alias result name", alias_node->result_name); - original_distinct_columns.insert(alias_node->result_name); + logDebug("distinct column name", column); + const auto * alias_node = original_node_finder.find(String(column)); + if (!alias_node) + { + logDebug("original name for alias is not found", column); + source_columns.insert(String(column)); + } + else + { + logDebug("alias result name", alias_node->result_name); + source_columns.insert(alias_node->result_name); + } } + + current_columns = std::move(source_columns); + source_columns.clear(); } /// if aggregation keys are part of distinct columns then rows already distinct for (const auto & key : aggregation_keys) { - if (!original_distinct_columns.contains(key)) + if (!current_columns.contains(key)) { - logDebug("aggregation key NOT found: {}", key); + logDebug("aggregation key NOT found", key); return false; } } @@ -122,30 +147,13 @@ namespace return false; } - /// build actions DAG from stack of steps - ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) - { - if (dag_stack.empty()) - return nullptr; - - ActionsDAGPtr path_actions = dag_stack.back()->clone(); - dag_stack.pop_back(); - while (!dag_stack.empty()) - { - ActionsDAGPtr clone = dag_stack.back()->clone(); - logActionsDAG("DAG to merge", clone); - dag_stack.pop_back(); - path_actions->mergeInplace(std::move(*clone)); - } - return path_actions; - } - bool passTillAggregation(const QueryPlan::Node * distinct_node) { const DistinctStep * distinct_step = typeid_cast(distinct_node->step.get()); chassert(distinct_step); std::vector dag_stack; + std::vector> actions_chain; const DistinctStep * inner_distinct_step = nullptr; const IQueryPlanStep * aggregation_before_distinct = nullptr; const QueryPlan::Node * node = distinct_node; @@ -163,6 +171,16 @@ namespace break; } + if (typeid_cast(current_step)) + { + /// it can be empty in case of 2 WindowSteps following one another + if (!dag_stack.empty()) + { + actions_chain.push_back(std::move(dag_stack)); + dag_stack.clear(); + } + } + if (const auto * const expr = typeid_cast(current_step); expr) dag_stack.push_back(expr->getExpression()); else if (const auto * const filter = typeid_cast(current_step); filter) @@ -177,16 +195,22 @@ namespace if (aggregation_before_distinct) { - ActionsDAGPtr actions = buildActionsForPlanPath(dag_stack); - logActionsDAG("aggregation pass: merged DAG", actions); + if (actions_chain.empty()) + actions_chain.push_back(std::move(dag_stack)); const auto distinct_columns = getDistinctColumns(distinct_step); if (const auto * aggregating_step = typeid_cast(aggregation_before_distinct); aggregating_step) - return compareAggregationKeysWithDistinctColumns(aggregating_step->getParams().keys, distinct_columns, actions); + { + return compareAggregationKeysWithDistinctColumns( + aggregating_step->getParams().keys, distinct_columns, std::move(actions_chain)); + } else if (const auto * merging_aggregated_step = typeid_cast(aggregation_before_distinct); merging_aggregated_step) - return compareAggregationKeysWithDistinctColumns(merging_aggregated_step->getParams().keys, distinct_columns, actions); + { + return compareAggregationKeysWithDistinctColumns( + merging_aggregated_step->getParams().keys, distinct_columns, std::move(actions_chain)); + } } return false; diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 3e9e3f7ea11..ed4b1906635 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -128,15 +128,21 @@ class IndexAccess public: explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { - /// Some suffix of index columns might not be loaded (see `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`) - /// and we need to use the same set of index columns across all parts. + /// Indices might be reloaded during the process and the reload might produce a different value + /// (change in `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`). Also, some suffix of index + /// columns might not be loaded (same setting) so we keep a reference to the current indices and + /// track the minimal subset of loaded columns across all parts. + indices.reserve(parts.size()); for (const auto & part : parts) - loaded_columns = std::min(loaded_columns, part.data_part->getIndex()->size()); + indices.push_back(part.data_part->getIndex()); + + for (const auto & index : indices) + loaded_columns = std::min(loaded_columns, index->size()); } Values getValue(size_t part_idx, size_t mark) const { - const auto & index = parts[part_idx].data_part->getIndex(); + const auto & index = indices[part_idx]; chassert(index->size() >= loaded_columns); Values values(loaded_columns); for (size_t i = 0; i < loaded_columns; ++i) @@ -206,6 +212,7 @@ public: } private: const RangesInDataParts & parts; + std::vector indices; size_t loaded_columns = std::numeric_limits::max(); }; @@ -617,14 +624,11 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, } /// Process parts ranges with undefined value at end mark - bool is_intersecting = part_index_start_to_range.size() > 1; + /// The last parts ranges could be non-intersect only if: (1) there is only one part range left, (2) it belongs to a non-L0 part, + /// and (3) the begin value of this range is larger than the largest end value of all previous ranges. This is too complicated + /// to check, so we just add the last part ranges to the intersecting ranges. for (const auto & [part_range_index, mark_range] : part_index_start_to_range) - { - if (is_intersecting) - add_intersecting_range(part_range_index.part_index, mark_range); - else - add_non_intersecting_range(part_range_index.part_index, mark_range); - } + add_intersecting_range(part_range_index.part_index, mark_range); auto && non_intersecting_ranges_in_data_parts = std::move(non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); auto && intersecting_ranges_in_data_parts = std::move(intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 0fae7e8df4d..b78f7a29cde 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -520,10 +520,6 @@ void QueryPlan::explainEstimate(MutableColumns & columns) UInt64 parts = 0; UInt64 rows = 0; UInt64 marks = 0; - - EstimateCounters(const std::string & database, const std::string & table) : database_name(database), table_name(table) - { - } }; using CountersPtr = std::shared_ptr; diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index bf135ba3cd6..75c577af24e 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -7,7 +7,6 @@ #include #include -#include #include namespace DB diff --git a/src/Processors/QueryPlan/ReadFromLoopStep.cpp b/src/Processors/QueryPlan/ReadFromLoopStep.cpp new file mode 100644 index 00000000000..10436490a2a --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromLoopStep.cpp @@ -0,0 +1,156 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + namespace ErrorCodes + { + extern const int TOO_MANY_RETRIES_TO_FETCH_PARTS; + } + class PullingPipelineExecutor; + + class LoopSource : public ISource + { + public: + + LoopSource( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + ContextPtr & context_, + QueryProcessingStage::Enum processed_stage_, + StoragePtr inner_storage_, + size_t max_block_size_, + size_t num_streams_) + : ISource(storage_snapshot_->getSampleBlockForColumns(column_names_)) + , column_names(column_names_) + , query_info(query_info_) + , storage_snapshot(storage_snapshot_) + , processed_stage(processed_stage_) + , context(context_) + , inner_storage(std::move(inner_storage_)) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + { + } + + String getName() const override { return "Loop"; } + + Chunk generate() override + { + while (true) + { + if (!loop) + { + QueryPlan plan; + auto storage_snapshot_ = inner_storage->getStorageSnapshotForQuery(inner_storage->getInMemoryMetadataPtr(), nullptr, context); + inner_storage->read( + plan, + column_names, + storage_snapshot_, + query_info, + context, + processed_stage, + max_block_size, + num_streams); + auto builder = plan.buildQueryPipeline( + QueryPlanOptimizationSettings::fromContext(context), + BuildQueryPipelineSettings::fromContext(context)); + QueryPlanResourceHolder resources; + auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources); + query_pipeline = QueryPipeline(std::move(pipe)); + executor = std::make_unique(query_pipeline); + loop = true; + } + Chunk chunk; + if (executor->pull(chunk)) + { + if (chunk) + { + retries_count = 0; + return chunk; + } + + } + else + { + ++retries_count; + if (retries_count > max_retries_count) + throw Exception(ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS, "Too many retries to pull from storage"); + loop = false; + executor.reset(); + query_pipeline.reset(); + } + } + } + + private: + + const Names column_names; + SelectQueryInfo query_info; + const StorageSnapshotPtr storage_snapshot; + QueryProcessingStage::Enum processed_stage; + ContextPtr context; + StoragePtr inner_storage; + size_t max_block_size; + size_t num_streams; + // add retries. If inner_storage failed to pull X times in a row we'd better to fail here not to hang + size_t retries_count = 0; + size_t max_retries_count = 3; + bool loop = false; + QueryPipeline query_pipeline; + std::unique_ptr executor; + }; + + ReadFromLoopStep::ReadFromLoopStep( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + QueryProcessingStage::Enum processed_stage_, + StoragePtr inner_storage_, + size_t max_block_size_, + size_t num_streams_) + : SourceStepWithFilter( + DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , column_names(column_names_) + , processed_stage(processed_stage_) + , inner_storage(std::move(inner_storage_)) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + { + } + + Pipe ReadFromLoopStep::makePipe() + { + return Pipe(std::make_shared( + column_names, query_info, storage_snapshot, context, processed_stage, inner_storage, max_block_size, num_streams)); + } + + void ReadFromLoopStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) + { + auto pipe = makePipe(); + + if (pipe.empty()) + { + assert(output_stream != std::nullopt); + pipe = Pipe(std::make_shared(output_stream->header)); + } + + pipeline.init(std::move(pipe)); + } + +} diff --git a/src/Processors/QueryPlan/ReadFromLoopStep.h b/src/Processors/QueryPlan/ReadFromLoopStep.h new file mode 100644 index 00000000000..4eee0ca5605 --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromLoopStep.h @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ + + class ReadFromLoopStep final : public SourceStepWithFilter + { + public: + ReadFromLoopStep( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + QueryProcessingStage::Enum processed_stage_, + StoragePtr inner_storage_, + size_t max_block_size_, + size_t num_streams_); + + String getName() const override { return "ReadFromLoop"; } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + private: + + Pipe makePipe(); + + const Names column_names; + QueryProcessingStage::Enum processed_stage; + StoragePtr inner_storage; + size_t max_block_size; + size_t num_streams; + }; +} diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 22ad53a39e0..2e7693b1b36 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -158,10 +158,7 @@ Pipe ReadFromMemoryStorageStep::makePipe() } size_t size = current_data->size(); - - if (num_streams > size) - num_streams = size; - + num_streams = std::min(num_streams, size); Pipes pipes; auto parallel_execution_index = std::make_shared>(0); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index bbd7185c440..3997f91b5e8 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -343,9 +343,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), - .count_participating_replicas = client_info.count_participating_replicas, .number_of_current_replica = client_info.number_of_current_replica, - .columns_to_read = required_columns, }; /// We have a special logic for local replica. It has to read less data, because in some cases it should @@ -381,7 +379,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -480,7 +478,7 @@ Pipe ReadFromMergeTree::readFromPool( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -516,9 +514,7 @@ Pipe ReadFromMergeTree::readInOrder( { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), - .count_participating_replicas = client_info.count_participating_replicas, .number_of_current_replica = client_info.number_of_current_replica, - .columns_to_read = required_columns, }; const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; @@ -592,7 +588,7 @@ Pipe ReadFromMergeTree::readInOrder( algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size, reader_settings); processor->addPartLevelToChunk(isQueryWithFinal()); @@ -1136,8 +1132,6 @@ static void addMergingFinal( return std::make_shared(header, num_outputs, sort_description, max_block_size_rows, /*max_block_size_bytes=*/0, merging_params.graphite_params, now); } - - UNREACHABLE(); }; pipe.addTransform(get_merging_processor()); @@ -1187,8 +1181,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( PartRangesReadInfo info(parts_with_ranges, settings, *data_settings); assert(num_streams == requested_num_streams); - if (num_streams > settings.max_final_threads) - num_streams = settings.max_final_threads; + num_streams = std::min(num_streams, settings.max_final_threads); /// If setting do_not_merge_across_partitions_select_final is true than we won't merge parts from different partitions. /// We have all parts in parts vector, where parts with same partition are nearby. @@ -1365,9 +1358,13 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( return merging_pipes.empty() ? Pipe::unitePipes(std::move(no_merging_pipes)) : Pipe::unitePipes(std::move(merging_pipes)); } +ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead() const +{ + return selectRangesToRead(prepared_parts, alter_conversions_for_parts, false /* find_exact_ranges */); +} + ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( - MergeTreeData::DataPartsVector parts, - std::vector alter_conversions) const + MergeTreeData::DataPartsVector parts, std::vector alter_conversions, bool find_exact_ranges) const { return selectRangesToRead( std::move(parts), @@ -1380,7 +1377,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( data, all_column_names, log, - indexes); + indexes, + find_exact_ranges); } static void buildIndexes( @@ -1519,25 +1517,7 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) { if (!indexes) { - /// Analyzer generates unique ColumnIdentifiers like __table1.__partition_id in filter nodes, - /// while key analysis still requires unqualified column names. - std::unordered_map node_name_to_input_node_column; - if (query_info.planner_context) - { - const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); - for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) - { - /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, - /// so they should not be added to the input nodes. - if (alias_column_expressions.contains(column_name)) - continue; - const auto & column = table_expression_data.getColumnOrThrow(column_name); - node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); - } - } - - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, node_name_to_input_node_column); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, query_info.buildNodeNameToInputNodeColumn()); /// NOTE: Currently we store two DAGs for analysis: /// (1) SourceStepWithFilter::filter_nodes, (2) query_info.filter_actions_dag. Make sure there are consistent. @@ -1567,34 +1547,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const MergeTreeData & data, const Names & all_column_names, LoggerPtr log, - std::optional & indexes) -{ - return selectRangesToReadImpl( - std::move(parts), - std::move(alter_conversions), - metadata_snapshot, - query_info_, - context_, - num_streams, - max_block_numbers_to_read, - data, - all_column_names, - log, - indexes); -} - -ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( - MergeTreeData::DataPartsVector parts, - std::vector alter_conversions, - const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info_, - ContextPtr context_, - size_t num_streams, - std::shared_ptr max_block_numbers_to_read, - const MergeTreeData & data, - const Names & all_column_names, - LoggerPtr log, - std::optional & indexes) + std::optional & indexes, + bool find_exact_ranges) { AnalysisResult result; const auto & settings = context_->getSettingsRef(); @@ -1682,7 +1636,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( log, num_streams, result.index_stats, - indexes->use_skip_indexes); + indexes->use_skip_indexes, + find_exact_ranges); } size_t sum_marks_pk = total_marks_pk; @@ -1856,10 +1811,7 @@ bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const { - auto result_ptr = analyzed_result_ptr - ? analyzed_result_ptr - : selectRangesToRead(prepared_parts, alter_conversions_for_parts); - + auto result_ptr = analyzed_result_ptr ? analyzed_result_ptr : selectRangesToRead(); return *result_ptr; } @@ -2131,8 +2083,6 @@ static const char * indexTypeToString(ReadFromMergeTree::IndexType type) case ReadFromMergeTree::IndexType::Skip: return "Skip"; } - - UNREACHABLE(); } static const char * readTypeToString(ReadFromMergeTree::ReadType type) @@ -2148,8 +2098,6 @@ static const char * readTypeToString(ReadFromMergeTree::ReadType type) case ReadFromMergeTree::ReadType::ParallelReplicas: return "Parallel"; } - - UNREACHABLE(); } void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 95dc7d5b22c..243ec737456 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -60,8 +60,7 @@ struct UsefulSkipIndexes class ReadFromMergeTree final : public SourceStepWithFilter { public: - - enum class IndexType + enum class IndexType : uint8_t { None, MinMax, @@ -162,11 +161,13 @@ public: const MergeTreeData & data, const Names & all_column_names, LoggerPtr log, - std::optional & indexes); + std::optional & indexes, + bool find_exact_ranges); AnalysisResultPtr selectRangesToRead( - MergeTreeData::DataPartsVector parts, - std::vector alter_conversions) const; + MergeTreeData::DataPartsVector parts, std::vector alter_conversions, bool find_exact_ranges = false) const; + + AnalysisResultPtr selectRangesToRead() const; StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; } @@ -195,19 +196,6 @@ public: void applyFilters(ActionDAGNodes added_filter_nodes) override; private: - static AnalysisResultPtr selectRangesToReadImpl( - MergeTreeData::DataPartsVector parts, - std::vector alter_conversions, - const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, - ContextPtr context, - size_t num_streams, - std::shared_ptr max_block_numbers_to_read, - const MergeTreeData & data, - const Names & all_column_names, - LoggerPtr log, - std::optional & indexes); - int getSortDirection() const { if (query_info.input_order_info) diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index b845101125b..92c936cdc20 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,6 +1,5 @@ #include #include -#include #include namespace DB diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 119710d06d8..84c2515e8ca 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -360,6 +360,7 @@ void ReadFromRemote::initializePipeline(QueryPipelineBuilder & pipeline, const B ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( ASTPtr query_ast_, ClusterPtr cluster_, + const StorageID & storage_id_, ParallelReplicasReadingCoordinatorPtr coordinator_, Block header_, QueryProcessingStage::Enum stage_, @@ -372,6 +373,7 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( : ISourceStep(DataStream{.header = std::move(header_)}) , cluster(cluster_) , query_ast(query_ast_) + , storage_id(storage_id_) , coordinator(std::move(coordinator_)) , stage(std::move(stage_)) , context(context_) @@ -384,6 +386,8 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( chassert(cluster->getShardCount() == 1); std::vector description; + description.push_back(fmt::format("query: {}", formattedAST(query_ast))); + for (const auto & pool : cluster->getShardsInfo().front().per_replica_pools) description.push_back(fmt::format("Replica: {}", pool->getHost())); @@ -419,7 +423,6 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder all_replicas_count = shard.getAllNodeCount(); } - std::vector shuffled_pool; if (all_replicas_count < shard.getAllNodeCount()) { @@ -452,7 +455,6 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder processor->setStorageLimits(storage_limits); pipeline.init(std::move(pipe)); - } @@ -488,6 +490,7 @@ void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica( RemoteQueryExecutor::Extension{.parallel_reading_coordinator = coordinator, .replica_info = std::move(replica_info)}); remote_query_executor->setLogger(log); + remote_query_executor->setMainTable(storage_id); pipes.emplace_back(createRemoteSourcePipe(std::move(remote_query_executor), add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); addConvertingActions(pipes.back(), output_stream->header); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index 498d584e85a..eb15269155a 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -69,6 +69,7 @@ public: ReadFromParallelRemoteReplicasStep( ASTPtr query_ast_, ClusterPtr cluster_, + const StorageID & storage_id_, ParallelReplicasReadingCoordinatorPtr coordinator_, Block header_, QueryProcessingStage::Enum stage_, @@ -91,6 +92,7 @@ private: ClusterPtr cluster; ASTPtr query_ast; + StorageID storage_id; ParallelReplicasReadingCoordinatorPtr coordinator; QueryProcessingStage::Enum stage; ContextMutablePtr context; diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 7a61d09bdd2..11371578c79 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -12,8 +12,8 @@ #include #include #include -#include "Core/Types.h" -#include "base/types.h" +#include + namespace DB { @@ -443,7 +443,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() /// Build rpn of query filters KeyCondition condition(filter_actions_dag, context, column_names, key_expression); - if (condition.extractPlainRanges(ranges)) { /// Intersect ranges with table range @@ -505,7 +504,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() } } - /// ranges is blank, return a source who has no data if (intersected_ranges.empty()) { diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h index cab0686474b..bc84e31be62 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h @@ -8,6 +8,7 @@ #include #include + namespace DB { @@ -43,4 +44,5 @@ private: UInt64 limit; std::shared_ptr storage_limits; }; + } diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index d0491cb4b82..8f40e523b42 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -37,6 +37,7 @@ SortingStep::Settings::Settings(const Context & context) max_bytes_before_external_sort = settings.max_bytes_before_external_sort; tmp_data = context.getTempDataOnDisk(); min_free_disk_space = settings.min_free_disk_space_for_temporary_data; + max_block_bytes = settings.prefer_external_sort_block_bytes; } SortingStep::Settings::Settings(size_t max_block_size_) @@ -284,6 +285,7 @@ void SortingStep::mergeSorting( header, result_sort_desc, sort_settings.max_block_size, + sort_settings.max_block_bytes, limit_, increase_sort_description_compile_attempts_current, sort_settings.max_bytes_before_remerge / pipeline.getNumStreams(), diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 52f48f66a32..49dcf9f3121 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -11,7 +11,7 @@ namespace DB class SortingStep : public ITransformingStep { public: - enum class Type + enum class Type : uint8_t { Full, FinishSorting, @@ -27,6 +27,7 @@ public: size_t max_bytes_before_external_sort = 0; TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; + size_t max_block_bytes = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index 5893c2aeb4f..ad0940b90b9 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -21,7 +21,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo { if (prewhere_info->row_level_filter) { - block = prewhere_info->row_level_filter->updateHeader(std::move(block)); + block = prewhere_info->row_level_filter->updateHeader(block); auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); if (!row_level_column.type->canBeUsedInBooleanContext()) { @@ -36,7 +36,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo if (prewhere_info->prewhere_actions) { - block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); + block = prewhere_info->prewhere_actions->updateHeader(block); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) @@ -80,7 +80,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo void SourceStepWithFilter::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, query_info.buildNodeNameToInputNodeColumn()); } void SourceStepWithFilter::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index d1bd70fd0b2..ac5e144bf4a 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -86,8 +86,6 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr case TotalsMode::AFTER_HAVING_AUTO: return "after_having_auto threshold " + std::to_string(auto_include_threshold); } - - UNREACHABLE(); } void TotalsHavingStep::describeActions(FormatSettings & settings) const diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index b60eab78b53..a81bc7bb1a9 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -8,7 +8,7 @@ namespace DB class ActionsDAG; using ActionsDAGPtr = std::shared_ptr; -enum class TotalsMode; +enum class TotalsMode : uint8_t; /// Execute HAVING and calculate totals. See TotalsHavingTransform. class TotalsHavingStep : public ITransformingStep diff --git a/src/Processors/ResizeProcessor.h b/src/Processors/ResizeProcessor.h index 61e35c54364..8d26387c039 100644 --- a/src/Processors/ResizeProcessor.h +++ b/src/Processors/ResizeProcessor.h @@ -45,14 +45,14 @@ private: bool initialized = false; bool is_reading_started = false; - enum class OutputStatus + enum class OutputStatus : uint8_t { NotActive, NeedData, Finished, }; - enum class InputStatus + enum class InputStatus : uint8_t { NotActive, HasData, @@ -107,14 +107,14 @@ private: std::queue waiting_outputs; bool initialized = false; - enum class OutputStatus + enum class OutputStatus : uint8_t { NotActive, NeedData, Finished, }; - enum class InputStatus + enum class InputStatus : uint8_t { NotActive, NeedData, diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index be691fd5b2e..985a82a7b17 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -241,8 +241,7 @@ namespace ReadBufferFromString in(value); time_t time = 0; readDateTimeText(time, in, assert_cast(data_type).getTimeZone()); - if (time < 0) - time = 0; + time = std::max(time, 0); assert_cast(column).insertValue(static_cast(time)); read_bytes_size += 4; break; @@ -275,7 +274,6 @@ namespace /// 8 bytes for double-precision X coordinate /// 8 bytes for double-precision Y coordinate ReadBufferFromMemory payload(value.data(), value.size()); - String val; payload.ignore(4); UInt8 endian; diff --git a/src/Processors/Sources/RecursiveCTESource.cpp b/src/Processors/Sources/RecursiveCTESource.cpp index b94cb188086..93503b45aaf 100644 --- a/src/Processors/Sources/RecursiveCTESource.cpp +++ b/src/Processors/Sources/RecursiveCTESource.cpp @@ -102,6 +102,7 @@ public: "Recursive CTE subquery {}. Expected projection columns to have same size in recursive and non recursive subquery.", recursive_cte_union_node->formatASTForErrorMessage()); + working_temporary_table_holder = recursive_cte_table->holder; working_temporary_table_storage = recursive_cte_table->storage; intermediate_temporary_table_holder = std::make_shared( @@ -147,6 +148,7 @@ public: truncateTemporaryTable(working_temporary_table_storage); + std::swap(intermediate_temporary_table_holder, working_temporary_table_holder); std::swap(intermediate_temporary_table_storage, working_temporary_table_storage); } @@ -172,6 +174,9 @@ private: SelectQueryOptions select_query_options; select_query_options.merge_tree_enable_remove_parts_from_snapshot_optimization = false; + const auto & recursive_table_name = recursive_cte_union_node->as().getCTEName(); + recursive_query_context->addOrUpdateExternalTable(recursive_table_name, working_temporary_table_holder); + auto interpreter = std::make_unique(query_to_execute, recursive_query_context, select_query_options); auto pipeline_builder = interpreter->buildQueryPipeline(); @@ -225,6 +230,7 @@ private: QueryTreeNodePtr recursive_query; ContextMutablePtr recursive_query_context; + TemporaryTableHolderPtr working_temporary_table_holder; StoragePtr working_temporary_table_storage; TemporaryTableHolderPtr intermediate_temporary_table_holder; diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h index b6aee6f7cb0..52cd1509567 100644 --- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h @@ -5,7 +5,7 @@ namespace DB { -enum class TTLUpdateField +enum class TTLUpdateField : uint8_t { COLUMNS_TTL, TABLE_TTL, diff --git a/src/Processors/Transforms/CheckConstraintsTransform.cpp b/src/Processors/Transforms/CheckConstraintsTransform.cpp index 3a6595ea4fb..e43aa6028da 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.cpp +++ b/src/Processors/Transforms/CheckConstraintsTransform.cpp @@ -57,7 +57,7 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) auto result_column = res_column.column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); - if (const auto * column_nullable = checkAndGetColumn(*result_column)) + if (const auto * column_nullable = checkAndGetColumn(&*result_column)) { const auto & nested_column = column_nullable->getNestedColumnPtr(); diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index b2e8e9bc89e..15f8355bdc7 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -20,11 +21,13 @@ ColumnGathererStream::ColumnGathererStream( size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_rows_, - size_t block_preferred_size_bytes_) + size_t block_preferred_size_bytes_, + bool is_result_sparse_) : sources(num_inputs) , row_sources_buf(row_sources_buf_) , block_preferred_size_rows(block_preferred_size_rows_) , block_preferred_size_bytes(block_preferred_size_bytes_) + , is_result_sparse(is_result_sparse_) { if (num_inputs == 0) throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There are no streams to gather"); @@ -32,15 +35,29 @@ ColumnGathererStream::ColumnGathererStream( void ColumnGathererStream::initialize(Inputs inputs) { + Columns source_columns; + source_columns.reserve(inputs.size()); for (size_t i = 0; i < inputs.size(); ++i) { - if (inputs[i].chunk) - { - sources[i].update(inputs[i].chunk.detachColumns().at(0)); - if (!result_column) - result_column = sources[i].column->cloneEmpty(); - } + if (!inputs[i].chunk) + continue; + + if (!is_result_sparse) + convertToFullIfSparse(inputs[i].chunk); + + sources[i].update(inputs[i].chunk.detachColumns().at(0)); + source_columns.push_back(sources[i].column); } + + if (source_columns.empty()) + return; + + result_column = source_columns[0]->cloneEmpty(); + if (is_result_sparse && !result_column->isSparse()) + result_column = ColumnSparse::create(std::move(result_column)); + + if (result_column->hasDynamicStructure()) + result_column->takeDynamicStructureFromSourceColumns(source_columns); } IMergingAlgorithm::Status ColumnGathererStream::merge() @@ -52,7 +69,19 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() if (source_to_fully_copy) /// Was set on a previous iteration { Chunk res; - res.addColumn(source_to_fully_copy->column); + /// For columns with Dynamic structure we cannot just take column source_to_fully_copy because resulting column may have + /// different Dynamic structure (and have some merge statistics after calling takeDynamicStructureFromSourceColumns). + /// We should insert into data resulting column using insertRangeFrom. + if (result_column->hasDynamicStructure()) + { + auto col = result_column->cloneEmpty(); + col->insertRangeFrom(*source_to_fully_copy->column, 0, source_to_fully_copy->column->size()); + res.addColumn(std::move(col)); + } + else + { + res.addColumn(source_to_fully_copy->column); + } merged_rows += source_to_fully_copy->size; source_to_fully_copy->pos = source_to_fully_copy->size; source_to_fully_copy = nullptr; @@ -96,7 +125,16 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() Chunk res; merged_rows += source_to_fully_copy->column->size(); merged_bytes += source_to_fully_copy->column->allocatedBytes(); - res.addColumn(source_to_fully_copy->column); + if (result_column->hasDynamicStructure()) + { + auto col = result_column->cloneEmpty(); + col->insertRangeFrom(*source_to_fully_copy->column, 0, source_to_fully_copy->column->size()); + res.addColumn(std::move(col)); + } + else + { + res.addColumn(source_to_fully_copy->column); + } source_to_fully_copy->pos = source_to_fully_copy->size; source_to_fully_copy = nullptr; return Status(std::move(res)); @@ -117,7 +155,12 @@ void ColumnGathererStream::consume(Input & input, size_t source_num) { auto & source = sources[source_num]; if (input.chunk) + { + if (!is_result_sparse) + convertToFullIfSparse(input.chunk); + source.update(input.chunk.getColumns().at(0)); + } if (0 == source.size) { @@ -130,10 +173,11 @@ ColumnGathererTransform::ColumnGathererTransform( size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_rows_, - size_t block_preferred_size_bytes_) + size_t block_preferred_size_bytes_, + bool is_result_sparse_) : IMergingTransform( num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, - num_inputs, row_sources_buf_, block_preferred_size_rows_, block_preferred_size_bytes_) + num_inputs, row_sources_buf_, block_preferred_size_rows_, block_preferred_size_bytes_, is_result_sparse_) , log(getLogger("ColumnGathererStream")) { if (header.columns() != 1) diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h index 4e56cffa46a..ec5691316ce 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.h +++ b/src/Processors/Transforms/ColumnGathererTransform.h @@ -60,7 +60,8 @@ public: size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_rows_, - size_t block_preferred_size_bytes_); + size_t block_preferred_size_bytes_, + bool is_result_sparse_); const char * getName() const override { return "ColumnGathererStream"; } void initialize(Inputs inputs) override; @@ -97,6 +98,7 @@ private: const size_t block_preferred_size_rows; const size_t block_preferred_size_bytes; + const bool is_result_sparse; Source * source_to_fully_copy = nullptr; @@ -113,7 +115,8 @@ public: size_t num_inputs, ReadBuffer & row_sources_buf_, size_t block_preferred_size_rows_, - size_t block_preferred_size_bytes_); + size_t block_preferred_size_bytes_, + bool is_result_sparse_); String getName() const override { return "ColumnGathererTransform"; } @@ -145,7 +148,6 @@ void ColumnGathererStream::gather(Column & column_res) next_required_source = -1; - /// We use do ... while here to ensure there will be at least one iteration of this loop. /// Because the column_res.byteSize() could be bigger than block_preferred_size_bytes already at this point. do diff --git a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.h b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.h index 0f5dab06fc9..8e15b04f37e 100644 --- a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.h +++ b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.h @@ -15,7 +15,7 @@ struct SetWithState : public Set using Set::Set; /// Flow: Creating -> Finished or Suspended - enum class State + enum class State : uint8_t { /// Set is not yet created, /// Creating processor continues to build set. diff --git a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp index 04741a6f231..281afbd7949 100644 --- a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp @@ -154,8 +154,7 @@ size_t DistinctSortedChunkTransform::getRangeEnd(size_t begin, size_t end, Predi const size_t linear_probe_threadhold = 16; size_t linear_probe_end = begin + linear_probe_threadhold; - if (linear_probe_end > end) - linear_probe_end = end; + linear_probe_end = std::min(linear_probe_end, end); for (size_t pos = begin; pos < linear_probe_end; ++pos) { diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.h b/src/Processors/Transforms/ExceptionKeepingTransform.h index cec0e0eea31..000b5da798a 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.h +++ b/src/Processors/Transforms/ExceptionKeepingTransform.h @@ -28,7 +28,7 @@ protected: OutputPort & output; Port::Data data; - enum class Stage + enum class Stage : uint8_t { Start, Consume, diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 0d3341b000c..2fbd2c21b8d 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -3,9 +3,9 @@ namespace DB { -Block ExpressionTransform::transformHeader(Block header, const ActionsDAG & expression) +Block ExpressionTransform::transformHeader(const Block & header, const ActionsDAG & expression) { - return expression.updateHeader(std::move(header)); + return expression.updateHeader(header); } diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index 791c7d7ba73..cd2aae044d5 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -24,7 +24,7 @@ public: String getName() const override { return "ExpressionTransform"; } - static Block transformHeader(Block header, const ActionsDAG & expression); + static Block transformHeader(const Block & header, const ActionsDAG & expression); protected: void transform(Chunk & chunk) override; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 879d61bb241..bb38c3e1dc5 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -67,7 +67,6 @@ static FillColumnDescription::StepFunction getStepFunction( FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE } - UNREACHABLE(); } static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & type) @@ -545,8 +544,7 @@ size_t getRangeEnd(size_t begin, size_t end, Predicate pred) const size_t linear_probe_threadhold = 16; size_t linear_probe_end = begin + linear_probe_threadhold; - if (linear_probe_end > end) - linear_probe_end = end; + linear_probe_end = std::min(linear_probe_end, end); for (size_t pos = begin; pos < linear_probe_end; ++pos) { diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index b3be9246f43..e8e7f99ce53 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; + extern const int LOGICAL_ERROR; } static void replaceFilterToConstant(Block & block, const String & filter_column_name) @@ -81,7 +82,11 @@ static std::unique_ptr combineFilterAndIndices( auto mutable_holder = ColumnUInt8::create(num_rows, 0); auto & data = mutable_holder->getData(); for (auto idx : selected_by_indices) + { + if (idx >= num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index {} out of range {}", idx, num_rows); data[idx] = 1; + } /// AND two filters auto * begin = data.data(); @@ -174,26 +179,22 @@ static std::unique_ptr combineFilterAndIndices( } Block FilterTransform::transformHeader( - Block header, - const ActionsDAG * expression, - const String & filter_column_name, - bool remove_filter_column) + const Block & header, const ActionsDAG * expression, const String & filter_column_name, bool remove_filter_column) { - if (expression) - header = expression->updateHeader(std::move(header)); + Block result = expression ? expression->updateHeader(header) : header; - auto filter_type = header.getByName(filter_column_name).type; + auto filter_type = result.getByName(filter_column_name).type; if (!filter_type->onlyNull() && !isUInt8(removeNullable(removeLowCardinality(filter_type)))) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Illegal type {} of column {} for filter. Must be UInt8 or Nullable(UInt8).", filter_type->getName(), filter_column_name); if (remove_filter_column) - header.erase(filter_column_name); + result.erase(filter_column_name); else - replaceFilterToConstant(header, filter_column_name); + replaceFilterToConstant(result, filter_column_name); - return header; + return result; } FilterTransform::FilterTransform( diff --git a/src/Processors/Transforms/FilterTransform.h b/src/Processors/Transforms/FilterTransform.h index bb72b72d619..23c694eed0b 100644 --- a/src/Processors/Transforms/FilterTransform.h +++ b/src/Processors/Transforms/FilterTransform.h @@ -22,11 +22,8 @@ public: const Block & header_, ExpressionActionsPtr expression_, String filter_column_name_, bool remove_filter_column_, bool on_totals_ = false, std::shared_ptr> rows_filtered_ = nullptr); - static Block transformHeader( - Block header, - const ActionsDAG * expression, - const String & filter_column_name, - bool remove_filter_column); + static Block + transformHeader(const Block & header, const ActionsDAG * expression, const String & filter_column_name, bool remove_filter_column); String getName() const override { return "FilterTransform"; } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 0c0a86ce270..3e2a9462e54 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -14,12 +14,12 @@ namespace ErrorCodes Block JoiningTransform::transformHeader(Block header, const JoinPtr & join) { - LOG_DEBUG(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); + LOG_TEST(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); join->checkTypesOfKeys(header); join->initialize(header); ExtraBlockPtr tmp; join->joinBlock(header, tmp); - LOG_DEBUG(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); + LOG_TEST(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); return header; } diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 584125b046f..159a3244fe9 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -48,8 +48,8 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, { if constexpr (has_left_nulls && has_right_nulls) { - const auto * left_nullable = checkAndGetColumn(left_column); - const auto * right_nullable = checkAndGetColumn(right_column); + const auto * left_nullable = checkAndGetColumn(&left_column); + const auto * right_nullable = checkAndGetColumn(&right_column); if (left_nullable && right_nullable) { @@ -67,7 +67,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if constexpr (has_left_nulls) { - if (const auto * left_nullable = checkAndGetColumn(left_column)) + if (const auto * left_nullable = checkAndGetColumn(&left_column)) { if (left_nullable->isNullAt(lhs_pos)) return null_direction_hint; @@ -77,7 +77,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if constexpr (has_right_nulls) { - if (const auto * right_nullable = checkAndGetColumn(right_column)) + if (const auto * right_nullable = checkAndGetColumn(&right_column)) { if (right_nullable->isNullAt(rhs_pos)) return -null_direction_hint; @@ -338,8 +338,6 @@ static void prepareChunk(Chunk & chunk) void MergeJoinAlgorithm::initialize(Inputs inputs) { - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, 0, inputs[0].chunk.dumpStructure()); - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, 1, inputs[1].chunk.dumpStructure()); if (inputs.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "Two inputs are required, got {}", inputs.size()); @@ -351,8 +349,6 @@ void MergeJoinAlgorithm::initialize(Inputs inputs) void MergeJoinAlgorithm::consume(Input & input, size_t source_num) { - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, source_num, input.chunk.dumpStructure()); - if (input.skip_last_row) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "skip_last_row is not supported"); @@ -816,15 +812,9 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted()) return Status(1); - for (size_t i = 0; i < 2; ++i) - { - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: sampleColumns {} '{}'", __FILE__, __LINE__, i, cursors[i]->sampleBlock().dumpStructure()); - } - if (auto result = handleAllJoinState()) { - LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: '{}'", __FILE__, __LINE__, result ? result->chunk.dumpStructure() : "NA"); return std::move(*result); } diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index 64d84ea4b00..ede13b29219 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -80,6 +80,7 @@ MergeSortingTransform::MergeSortingTransform( const Block & header, const SortDescription & description_, size_t max_merged_block_size_, + size_t max_block_bytes_, UInt64 limit_, bool increase_sort_description_compile_attempts, size_t max_bytes_before_remerge_, @@ -93,6 +94,7 @@ MergeSortingTransform::MergeSortingTransform( , max_bytes_before_external_sort(max_bytes_before_external_sort_) , tmp_data(std::move(tmp_data_)) , min_free_disk_space(min_free_disk_space_) + , max_block_bytes(max_block_bytes_) { } @@ -169,7 +171,13 @@ void MergeSortingTransform::consume(Chunk chunk) /// If there's less free disk space than reserve_size, an exception will be thrown size_t reserve_size = sum_bytes_in_blocks + min_free_disk_space; auto & tmp_stream = tmp_data->createStream(header_without_constants, reserve_size); - + size_t max_merged_block_size = this->max_merged_block_size; + if (max_block_bytes > 0 && sum_rows_in_blocks > 0 && sum_bytes_in_blocks > 0) + { + auto avg_row_bytes = sum_bytes_in_blocks / sum_rows_in_blocks; + /// max_merged_block_size >= 128 + max_merged_block_size = std::max(std::min(max_merged_block_size, max_block_bytes / avg_row_bytes), 128UL); + } merge_sorter = std::make_unique(header_without_constants, std::move(chunks), description, max_merged_block_size, limit); auto current_processor = std::make_shared(header_without_constants, tmp_stream, log); diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index 4478d5a07e8..a39dd66caa0 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -23,6 +23,7 @@ public: const Block & header, const SortDescription & description_, size_t max_merged_block_size_, + size_t max_block_bytes, UInt64 limit_, bool increase_sort_description_compile_attempts, size_t max_bytes_before_remerge_, @@ -46,6 +47,7 @@ private: size_t max_bytes_before_external_sort; TemporaryDataOnDiskPtr tmp_data; size_t min_free_disk_space; + size_t max_block_bytes; size_t sum_rows_in_blocks = 0; size_t sum_bytes_in_blocks = 0; diff --git a/src/Processors/Transforms/SortingTransform.h b/src/Processors/Transforms/SortingTransform.h index d9a30699f92..376b616352a 100644 --- a/src/Processors/Transforms/SortingTransform.h +++ b/src/Processors/Transforms/SortingTransform.h @@ -94,7 +94,7 @@ protected: void removeConstColumns(Chunk & chunk); void enrichChunkWithConstants(Chunk & chunk); - enum class Stage + enum class Stage : uint8_t { Consume = 0, Generate, diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 0d69b6e0a8d..ed67dd508f3 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -56,49 +56,39 @@ void SquashingChunksTransform::work() SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, true), squashing(min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } -void SimpleSquashingChunksTransform::transform(Chunk & chunk) +void SimpleSquashingChunksTransform::consume(Chunk chunk) { - if (!finished) - { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - chunk.setColumns(block.getColumns(), block.rows()); - } - else - { - if (chunk.hasRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - - auto block = squashing.add({}); - chunk.setColumns(block.getColumns(), block.rows()); - } + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } -IProcessor::Status SimpleSquashingChunksTransform::prepare() +Chunk SimpleSquashingChunksTransform::generate() { - if (!finished && input.isFinished()) - { - if (output.isFinished()) - return Status::Finished; + if (squashed_chunk.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - if (!output.canPush()) - return Status::PortFull; + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; +} - if (has_output) - { - output.pushData(std::move(output_data)); - has_output = false; - return Status::PortFull; - } +bool SimpleSquashingChunksTransform::canGenerate() +{ + return !squashed_chunk.empty(); +} - finished = true; - /// On the next call to transform() we will return all data buffered in `squashing` (if any) - return Status::Ready; - } - return ISimpleTransform::prepare(); +Chunk SimpleSquashingChunksTransform::getRemaining() +{ + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; } } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f82e9e46a61..8c30a6032e4 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace DB @@ -29,7 +30,7 @@ private: }; /// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingChunksTransform : public ISimpleTransform +class SimpleSquashingChunksTransform : public IInflatingTransform { public: explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); @@ -37,14 +38,14 @@ public: String getName() const override { return "SimpleSquashingTransform"; } protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; private: SquashingTransform squashing; - - /// When consumption is finished we need to release the final chunk regardless of its size. - bool finished = false; + Chunk squashed_chunk; }; + } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 578d8cb8374..aa86879e62c 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -49,7 +49,7 @@ Block TotalsHavingTransform::transformHeader( if (expression) { - block = expression->updateHeader(std::move(block)); + block = expression->updateHeader(block); if (remove_filter) block.erase(filter_column_name); } diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index 350956c9c6b..ab605b52d93 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -11,7 +11,7 @@ using ExpressionActionsPtr = std::shared_ptr; class ActionsDAG; -enum class TotalsMode; +enum class TotalsMode : uint8_t; /** Takes blocks after grouping, with non-finalized aggregate functions. * Calculates total values according to totals_mode. diff --git a/src/Processors/Transforms/WatermarkTransform.cpp b/src/Processors/Transforms/WatermarkTransform.cpp index bd2e2b8d363..751b99cd4ee 100644 --- a/src/Processors/Transforms/WatermarkTransform.cpp +++ b/src/Processors/Transforms/WatermarkTransform.cpp @@ -38,8 +38,7 @@ void WatermarkTransform::transform(Chunk & chunk) const ColumnUInt32::Container & window_end_data = static_cast(*window_column).getData(); for (const auto & ts : window_end_data) { - if (ts > max_watermark) - max_watermark = ts; + max_watermark = std::max(ts, max_watermark); if (lateness_upper_bound && ts <= lateness_upper_bound) late_signals.insert(ts); } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index f43b9a2e794..af340c4aab8 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1630,15 +1630,14 @@ struct StatefulWindowFunction : public WindowFunction void destroy(AggregateDataPtr __restrict place) const noexcept override { - auto * const state = static_cast(static_cast(place)); - state->~State(); + reinterpret_cast(place)->~State(); } bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v; } State & getState(const WindowFunctionWorkspace & workspace) const { - return *static_cast(static_cast(workspace.aggregate_function_state.data())); + return *reinterpret_cast(workspace.aggregate_function_state.data()); } }; @@ -2516,7 +2515,7 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction if (ts_scale_multiplier) { const auto & column = transform->blockAt(transform->current_row.block).input_columns[workspace.argument_column_indices[ARGUMENT_TIMESTAMP]]; - const auto & curr_timestamp = checkAndGetColumn(column.get())->getInt(transform->current_row.row); + const auto & curr_timestamp = checkAndGetColumn(*column).getInt(transform->current_row.row); Float64 time_elapsed = curr_timestamp - state.previous_timestamp; result = (time_elapsed > 0) ? (metric_diff * ts_scale_multiplier / time_elapsed * interval_duration) : 0; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index aa5a1c0cc1a..a1a886fb4f7 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -361,7 +361,10 @@ std::optional generateViewChain( } InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); - out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, !materialized_view->hasInnerTable()); + + /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` + bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; + out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); if (interpreter.shouldAddSquashingFroStorage(inner_table)) { @@ -411,7 +414,8 @@ std::optional generateViewChain( out.getInputHeader(), view_id, nullptr, - std::move(runtime_stats)}); + std::move(runtime_stats), + insert_context}); if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { @@ -587,7 +591,7 @@ Chain buildPushingToViewsChain( static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { - const auto & context = views_data.context; + const auto & context = view.context; /// We create a table with the same name as original table and the same alias columns, /// but it will contain single block (that is INSERT-ed into main table). @@ -894,8 +898,6 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St { return std::current_exception(); } - - UNREACHABLE(); } void FinalizingViewsTransform::work() diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h index 53aceeda1cc..a1feed91b60 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.h +++ b/src/Processors/Transforms/buildPushingToViewsChain.h @@ -33,6 +33,9 @@ struct ViewRuntimeData /// Info which is needed for query views log. std::unique_ptr runtime_stats; + /// An overridden context bounded to this view with the correct SQL security grants. + ContextPtr context; + void setException(std::exception_ptr e) { exception = e; diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index ec102605677..19fdbd77cb2 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -74,7 +74,7 @@ public: InputPort * totals_in, InputPort * extremes_in, OutputPort * totals_out, OutputPort * extremes_out); - enum class StreamType + enum class StreamType : uint8_t { Main = 0, /// Stream for query data. There may be several streams of this type. Totals, /// Stream for totals. No more than one. diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 67a8fe5dcab..803d1686ad7 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -298,8 +298,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( /// If one of pipelines uses more threads then current limit, will keep it. /// It may happen if max_distributed_connections > max_threads - if (pipeline.max_threads > max_threads_limit) - max_threads_limit = pipeline.max_threads; + max_threads_limit = std::max(pipeline.max_threads, max_threads_limit); concurrency_control = pipeline.getConcurrencyControl(); } diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.h b/src/QueryPipeline/RemoteQueryExecutorReadContext.h index 4e62b42a067..b8aa8bb9111 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.h +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.h @@ -2,7 +2,6 @@ #if defined(OS_LINUX) -#include #include #include #include diff --git a/src/QueryPipeline/SizeLimits.h b/src/QueryPipeline/SizeLimits.h index fc052714b0c..1c84f81a127 100644 --- a/src/QueryPipeline/SizeLimits.h +++ b/src/QueryPipeline/SizeLimits.h @@ -7,7 +7,7 @@ namespace DB { /// What to do if the limit is exceeded. -enum class OverflowMode +enum class OverflowMode : uint8_t { THROW = 0, /// Throw exception. BREAK = 1, /// Abort query execution, return what is. diff --git a/src/QueryPipeline/StreamLocalLimits.h b/src/QueryPipeline/StreamLocalLimits.h index 5df026e6e3d..86d18a1bff6 100644 --- a/src/QueryPipeline/StreamLocalLimits.h +++ b/src/QueryPipeline/StreamLocalLimits.h @@ -13,7 +13,7 @@ namespace DB * It is checks max_{rows,bytes}_to_read in progress handler and use info from ProcessListElement::progress_in for this. * Currently this check is performed only in leaf streams. */ -enum class LimitsMode +enum class LimitsMode : uint8_t { LIMITS_CURRENT, LIMITS_TOTAL, diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 394bb4722f8..10b59751b22 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -596,7 +596,7 @@ namespace std::tie(new_pos, new_size) = callback(); if (!new_size) return false; - BufferBase::set(static_cast(const_cast(new_pos)), new_size, 0); + BufferBase::set(static_cast(const_cast(static_cast(new_pos))), new_size, 0); return true; } @@ -639,7 +639,7 @@ namespace /// Handles a connection after a responder is started (i.e. after getting a new call). - class Call + class Call // NOLINT(clang-analyzer-optin.performance.Padding) { public: Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, LoggerRawPtr log_); @@ -873,7 +873,7 @@ namespace query_context->getClientInfo().client_trace_context, query_context->getSettingsRef(), query_context->getOpenTelemetrySpanLog()); - thread_trace_context->root_span.kind = OpenTelemetry::SERVER; + thread_trace_context->root_span.kind = OpenTelemetry::SpanKind::SERVER; /// Prepare for sending exceptions and logs. const Settings & settings = query_context->getSettingsRef(); diff --git a/src/Server/HTTP/HTTPContext.h b/src/Server/HTTP/HTTPContext.h index 09c46ed188c..86054827632 100644 --- a/src/Server/HTTP/HTTPContext.h +++ b/src/Server/HTTP/HTTPContext.h @@ -12,7 +12,6 @@ struct IHTTPContext virtual uint64_t getMaxFields() const = 0; virtual uint64_t getMaxFieldNameSize() const = 0; virtual uint64_t getMaxFieldValueSize() const = 0; - virtual uint64_t getMaxChunkSize() const = 0; virtual Poco::Timespan getReceiveTimeout() const = 0; virtual Poco::Timespan getSendTimeout() const = 0; diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp index 9db02eac220..3e82ec82550 100644 --- a/src/Server/HTTP/HTTPServerRequest.cpp +++ b/src/Server/HTTP/HTTPServerRequest.cpp @@ -20,6 +20,8 @@ #include #endif +static constexpr UInt64 HTTP_MAX_CHUNK_SIZE = 100ULL << 30; + namespace DB { HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session, const ProfileEvents::Event & read_event) @@ -54,7 +56,7 @@ HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse /// and retry with exactly the same (incomplete) set of rows. /// That's why we have to check body size if it's provided. if (getChunkedTransferEncoding()) - stream = std::make_unique(std::move(in), context->getMaxChunkSize()); + stream = std::make_unique(std::move(in), HTTP_MAX_CHUNK_SIZE); else if (hasContentLength()) { size_t content_length = getContentLength(); diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h index af08467baeb..ac4f52e7766 100644 --- a/src/Server/HTTP/HTTPServerResponse.h +++ b/src/Server/HTTP/HTTPServerResponse.h @@ -132,12 +132,11 @@ protected: void nextImpl() override { if (chunked) - return nextImplChunked(); - - if (fixed_length) - return nextImplFixedLength(); - - WriteBufferFromPocoSocket::nextImpl(); + nextImplChunked(); + else if (fixed_length) + nextImplFixedLength(); + else + WriteBufferFromPocoSocket::nextImpl(); } void nextImplFixedLength() diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 4d29e4d6a87..d1db4cb3951 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -707,11 +707,11 @@ void HTTPHandler::processQuery( /// The data can also be compressed using incompatible internal algorithm. This is indicated by /// 'decompress' query parameter. std::unique_ptr in_post_maybe_compressed; - bool in_post_compressed = false; + bool is_in_post_compressed = false; if (params.getParsed("decompress", false)) { - in_post_maybe_compressed = std::make_unique(*in_post); - in_post_compressed = true; + in_post_maybe_compressed = std::make_unique(*in_post, /* allow_different_codecs_ = */ false, /* external_data_ = */ true); + is_in_post_compressed = true; } else in_post_maybe_compressed = std::move(in_post); @@ -845,7 +845,7 @@ void HTTPHandler::processQuery( /// If 'http_native_compression_disable_checksumming_on_decompress' setting is turned on, /// checksums of client data compressed with internal algorithm are not checked. - if (in_post_compressed && settings.http_native_compression_disable_checksumming_on_decompress) + if (is_in_post_compressed && settings.http_native_compression_disable_checksumming_on_decompress) static_cast(*in_post_maybe_compressed).disableChecksumming(); /// Add CORS header if 'add_http_cors_header' setting is turned on send * in Access-Control-Allow-Origin @@ -1108,7 +1108,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse client_trace_context, context->getSettingsRef(), context->getOpenTelemetrySpanLog()); - thread_trace_context->root_span.kind = OpenTelemetry::SERVER; + thread_trace_context->root_span.kind = OpenTelemetry::SpanKind::SERVER; thread_trace_context->root_span.addAttribute("clickhouse.uri", request.getURI()); response.setContentType("text/plain; charset=UTF-8"); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index ae4cf034276..a96402247a2 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -77,12 +77,12 @@ private: bool exception_is_written = false; std::function exception_writer; - inline bool hasDelayed() const + bool hasDelayed() const { return out_maybe_delayed_and_compressed != out_maybe_compressed.get(); } - inline void finalize() + void finalize() { if (finalized) return; @@ -94,7 +94,7 @@ private: out->finalize(); } - inline bool isFinalized() const + bool isFinalized() const { return finalized; } diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index ac18c36e6c9..b4c32366463 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 15e64cf7f48..de1920bd535 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index ae2f150c4a1..e3a820340ad 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -384,7 +385,7 @@ void TCPHandler::runImpl() query_context->getClientInfo().client_trace_context, query_context->getSettingsRef(), query_context->getOpenTelemetrySpanLog()); - thread_trace_context->root_span.kind = OpenTelemetry::SERVER; + thread_trace_context->root_span.kind = OpenTelemetry::SpanKind::SERVER; query_scope.emplace(query_context, /* fatal_error_callback */ [this] { @@ -1107,7 +1108,7 @@ void TCPHandler::processTablesStatusRequest() ContextPtr context_to_resolve_table_names; if (is_interserver_mode) { - /// In interserver mode session context does not exists, because authentication is done for each query. + /// In the interserver mode session context does not exist, because authentication is done for each query. /// We also cannot create query context earlier, because it cannot be created before authentication, /// but query is not received yet. So we have to do this trick. ContextMutablePtr fake_interserver_context = Context::createCopy(server.context()); @@ -1872,7 +1873,7 @@ void TCPHandler::receiveQuery() if (state.part_uuids_to_ignore) query_context->getIgnoredPartUUIDs()->add(*state.part_uuids_to_ignore); - query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); }); + query_context->setProgressCallback([this] (const Progress & value) { this->updateProgress(value); }); query_context->setFileProgressCallback([this](const FileProgress & value) { this->updateProgress(Progress(value)); }); /// diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 281fc72dfc4..4879d1a16dc 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1100,11 +1100,11 @@ std::optional AlterCommand::tryConvertToMutationCommand(Storage return result; } -bool AlterCommands::hasInvertedIndex(const StorageInMemoryMetadata & metadata) +bool AlterCommands::hasFullTextIndex(const StorageInMemoryMetadata & metadata) { for (const auto & index : metadata.secondary_indices) { - if (index.type == INVERTED_INDEX_NAME) + if (index.type == FULL_TEXT_INDEX_NAME) return true; } return false; @@ -1224,7 +1224,7 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) if (has_column) { - auto column_from_table = columns.get(command.column_name); + const auto & column_from_table = columns.get(command.column_name); if (command.data_type && !command.default_expression && column_from_table.default_desc.expression) { command.default_kind = column_from_table.default_desc.kind; @@ -1288,7 +1288,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const /// Looks like there is something around default expression for this column (method `getDefault` is not implemented for the data type Object). /// But after ALTER TABLE ADD COLUMN we need to fill existing rows with something (exactly the default value). /// So we don't allow to do it for now. - if (command.data_type->hasDynamicSubcolumns()) + if (command.data_type->hasDynamicSubcolumnsDeprecated()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs"); if (virtuals->tryGet(column_name, VirtualsKind::Persistent)) @@ -1366,8 +1366,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const const GetColumnsOptions options(GetColumnsOptions::All); const auto old_data_type = all_columns.getColumn(options, column_name).type; - bool new_type_has_object = command.data_type->hasDynamicSubcolumns(); - bool old_type_has_object = old_data_type->hasDynamicSubcolumns(); + bool new_type_has_object = command.data_type->hasDynamicSubcolumnsDeprecated(); + bool old_type_has_object = old_data_type->hasDynamicSubcolumnsDeprecated(); if (new_type_has_object || old_type_has_object) throw Exception( @@ -1566,7 +1566,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } /// if we change data type for column with default else if (all_columns.has(column_name) && command.data_type) { - auto column_in_table = all_columns.get(column_name); + const auto & column_in_table = all_columns.get(column_name); /// Column doesn't have a default, nothing to check if (!column_in_table.default_desc.expression) continue; diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index b1b6c8308f9..46abffab8ad 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -54,7 +54,7 @@ struct AlterCommand }; /// Which property user wants to remove from column - enum class RemoveProperty + enum class RemoveProperty : uint8_t { NO_PROPERTY, /// Default specifiers @@ -234,8 +234,8 @@ public: /// additional mutation command (MATERIALIZE_TTL) will be returned. MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters=false) const; - /// Check if commands have any inverted index - static bool hasInvertedIndex(const StorageInMemoryMetadata & metadata); + /// Check if commands have any full-text index + static bool hasFullTextIndex(const StorageInMemoryMetadata & metadata); }; } diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index c0277d0cbbb..cffb1dc9ca3 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -74,7 +74,6 @@ std::unique_ptr RemoteReadBuffer::create( bool is_random_accessed) { - auto remote_path = remote_file_metadata->remote_path; auto remote_read_buffer = std::make_unique(buff_size); std::tie(remote_read_buffer->local_file_holder, read_buffer) @@ -205,7 +204,7 @@ void ExternalDataSourceCache::recoverTask() } } for (auto & path : invalid_paths) - fs::remove_all(path); + (void)fs::remove_all(path); initialized = true; auto root_dirs_to_string = [&]() diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index a5dea2f63db..4c8c7974005 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -70,7 +70,7 @@ public: void initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_); - inline bool isInitialized() const { return initialized; } + bool isInitialized() const { return initialized; } std::pair, std::unique_ptr> createReader(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer, bool is_random_accessed); diff --git a/src/Storages/Cache/RemoteCacheController.cpp b/src/Storages/Cache/RemoteCacheController.cpp index 403d0c8e43b..092cabc7239 100644 --- a/src/Storages/Cache/RemoteCacheController.cpp +++ b/src/Storages/Cache/RemoteCacheController.cpp @@ -206,7 +206,7 @@ void RemoteCacheController::close() // delete directory LOG_TRACE(log, "Removing the local cache. local path: {}", local_path.string()); if (fs::exists(local_path)) - fs::remove_all(local_path); + (void)fs::remove_all(local_path); } std::unique_ptr RemoteCacheController::allocFile() diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h index 782a6b89519..22b3d64b1db 100644 --- a/src/Storages/Cache/RemoteCacheController.h +++ b/src/Storages/Cache/RemoteCacheController.h @@ -45,41 +45,41 @@ public: */ void waitMoreData(size_t start_offset_, size_t end_offset_); - inline size_t size() const { return current_offset; } + size_t size() const { return current_offset; } - inline const std::filesystem::path & getLocalPath() { return local_path; } - inline String getRemotePath() const { return file_metadata_ptr->remote_path; } + const std::filesystem::path & getLocalPath() { return local_path; } + String getRemotePath() const { return file_metadata_ptr->remote_path; } - inline UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; } + UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; } bool isModified(IRemoteFileMetadataPtr file_metadata_); - inline void markInvalid() + void markInvalid() { std::lock_guard lock(mutex); valid = false; } - inline bool isValid() + bool isValid() { std::lock_guard lock(mutex); return valid; } - inline bool isEnable() + bool isEnable() { std::lock_guard lock(mutex); return is_enable; } - inline void disable() + void disable() { std::lock_guard lock(mutex); is_enable = false; } - inline void enable() + void enable() { std::lock_guard lock(mutex); is_enable = true; } IRemoteFileMetadataPtr getFileMetadata() { return file_metadata_ptr; } - inline size_t getFileSize() const { return file_metadata_ptr->file_size; } + size_t getFileSize() const { return file_metadata_ptr->file_size; } void startBackgroundDownload(std::unique_ptr in_readbuffer_, BackgroundSchedulePool & thread_pool); diff --git a/src/Storages/ColumnDefault.cpp b/src/Storages/ColumnDefault.cpp index dcb59f7bd65..a5f8e8df425 100644 --- a/src/Storages/ColumnDefault.cpp +++ b/src/Storages/ColumnDefault.cpp @@ -56,6 +56,30 @@ std::string toString(const ColumnDefaultKind kind) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ColumnDefaultKind"); } +ColumnDefault & ColumnDefault::operator=(const ColumnDefault & other) +{ + if (this == &other) + return *this; + + kind = other.kind; + expression = other.expression ? other.expression->clone() : nullptr; + ephemeral_default = other.ephemeral_default; + + return *this; +} + +ColumnDefault & ColumnDefault::operator=(ColumnDefault && other) noexcept +{ + if (this == &other) + return *this; + + kind = std::exchange(other.kind, ColumnDefaultKind{}); + expression = other.expression ? other.expression->clone() : nullptr; + other.expression.reset(); + ephemeral_default = std::exchange(other.ephemeral_default, false); + + return *this; +} bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs) { diff --git a/src/Storages/ColumnDefault.h b/src/Storages/ColumnDefault.h index af1be6f3bec..0ec486e022f 100644 --- a/src/Storages/ColumnDefault.h +++ b/src/Storages/ColumnDefault.h @@ -9,7 +9,7 @@ namespace DB { -enum class ColumnDefaultKind +enum class ColumnDefaultKind : uint8_t { Default, Materialized, @@ -24,15 +24,19 @@ std::string toString(ColumnDefaultKind kind); struct ColumnDefault { + ColumnDefault() = default; + ColumnDefault(const ColumnDefault & other) { *this = other; } + ColumnDefault & operator=(const ColumnDefault & other); + ColumnDefault(ColumnDefault && other) noexcept { *this = std::move(other); } + ColumnDefault & operator=(ColumnDefault && other) noexcept; + ColumnDefaultKind kind = ColumnDefaultKind::Default; ASTPtr expression; bool ephemeral_default = false; }; - bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs); - using ColumnDefaults = std::unordered_map; } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 16b89f24243..a8869970300 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -60,6 +60,46 @@ ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, ASTPtr cod { } +ColumnDescription & ColumnDescription::operator=(const ColumnDescription & other) +{ + if (this == &other) + return *this; + + name = other.name; + type = other.type; + default_desc = other.default_desc; + comment = other.comment; + codec = other.codec ? other.codec->clone() : nullptr; + settings = other.settings; + ttl = other.ttl ? other.ttl->clone() : nullptr; + stat = other.stat; + + return *this; +} + +ColumnDescription & ColumnDescription::operator=(ColumnDescription && other) noexcept +{ + if (this == &other) + return *this; + + name = std::move(other.name); + type = std::move(other.type); + default_desc = std::move(other.default_desc); + comment = std::move(other.comment); + + codec = other.codec ? other.codec->clone() : nullptr; + other.codec.reset(); + + settings = std::move(other.settings); + + ttl = other.ttl ? other.ttl->clone() : nullptr; + other.ttl.reset(); + + stat = std::move(other.stat); + + return *this; +} + bool ColumnDescription::operator==(const ColumnDescription & other) const { auto ast_to_str = [](const ASTPtr & ast) { return ast ? queryToString(ast) : String{}; }; @@ -547,7 +587,19 @@ bool ColumnsDescription::hasNested(const String & column_name) const bool ColumnsDescription::hasSubcolumn(const String & column_name) const { - return subcolumns.get<0>().count(column_name); + if (subcolumns.get<0>().count(column_name)) + return true; + + /// Check for dynamic subcolumns + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + auto it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return true; + } + + return false; } const ColumnDescription & ColumnsDescription::get(const String & column_name) const @@ -644,6 +696,15 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns return *jt; } + /// Check for dynamic subcolumns. + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + } + return {}; } @@ -730,9 +791,19 @@ bool ColumnsDescription::hasAlias(const String & column_name) const bool ColumnsDescription::hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const { auto it = columns.get<1>().find(column_name); - return (it != columns.get<1>().end() - && (defaultKindToGetKind(it->default_desc.kind) & kind)) - || hasSubcolumn(column_name); + if ((it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & kind)) || hasSubcolumn(column_name)) + return true; + + /// Check for dynamic subcolumns. + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->hasSubcolumn(dynamic_subcolumn_name)) + return true; + } + + return false; } bool ColumnsDescription::hasColumnOrNested(GetColumnsOptions::Kind kind, const String & column_name) const diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 82e55e29073..79e43d0a4e4 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -92,8 +92,11 @@ struct ColumnDescription std::optional stat; ColumnDescription() = default; - ColumnDescription(ColumnDescription &&) = default; - ColumnDescription(const ColumnDescription &) = default; + ColumnDescription(const ColumnDescription & other) { *this = other; } + ColumnDescription & operator=(const ColumnDescription & other); + ColumnDescription(ColumnDescription && other) noexcept { *this = std::move(other); } + ColumnDescription & operator=(ColumnDescription && other) noexcept; + ColumnDescription(String name_, DataTypePtr type_); ColumnDescription(String name_, DataTypePtr type_, String comment_); ColumnDescription(String name_, DataTypePtr type_, ASTPtr codec_, String comment_); diff --git a/src/Storages/CompressionCodecSelector.h b/src/Storages/CompressionCodecSelector.h index ad6e943e821..e03d06bacdb 100644 --- a/src/Storages/CompressionCodecSelector.h +++ b/src/Storages/CompressionCodecSelector.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include #include #include diff --git a/src/Storages/DataDestinationType.h b/src/Storages/DataDestinationType.h index 4729019b5cb..a22f2bdb706 100644 --- a/src/Storages/DataDestinationType.h +++ b/src/Storages/DataDestinationType.h @@ -4,7 +4,7 @@ namespace DB { -enum class DataDestinationType +enum class DataDestinationType : uint8_t { DISK, VOLUME, diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h deleted file mode 100644 index df7276b90b4..00000000000 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -template -struct DeltaLakeMetadataParser -{ -public: - DeltaLakeMetadataParser(); - - Strings getFiles(const Configuration & configuration, ContextPtr context); - -private: - struct Impl; - std::shared_ptr impl; -}; - -} diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp deleted file mode 100644 index 699dfe8fda0..00000000000 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include -#include -#include -#include "config.h" -#include -#include - -#if USE_AWS_S3 -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -template -struct HudiMetadataParser::Impl -{ - /** - * Useful links: - * - https://hudi.apache.org/tech-specs/ - * - https://hudi.apache.org/docs/file_layouts/ - */ - - /** - * Hudi tables store metadata files and data files. - * Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg, - * metadata is not required in order to understand which files we need to read, moreover, - * for Hudi metadata does not always exist. - * - * There can be two types of data files - * 1. base files (columnar file formats like Apache Parquet/Orc) - * 2. log files - * Currently we support reading only `base files`. - * Data file name format: - * [File Id]_[File Write Token]_[Transaction timestamp].[File Extension] - * - * To find needed parts we need to find out latest part file for every file group for every partition. - * Explanation why: - * Hudi reads in and overwrites the entire table/partition with each update. - * Hudi controls the number of file groups under a single partition according to the - * hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group. - * Each file group is identified by File Id. - */ - Strings processMetadataFiles(const Configuration & configuration) - { - auto log = getLogger("HudiMetadataParser"); - - const auto keys = MetadataReadHelper::listFiles(configuration, "", Poco::toLower(configuration.format)); - - using Partition = std::string; - using FileID = std::string; - struct FileInfo - { - String key; - UInt64 timestamp = 0; - }; - std::unordered_map> data_files; - - for (const auto & key : keys) - { - auto key_file = std::filesystem::path(key); - Strings file_parts; - const String stem = key_file.stem(); - splitInto<'_'>(file_parts, stem); - if (file_parts.size() != 3) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); - - const auto partition = key_file.parent_path().stem(); - const auto & file_id = file_parts[0]; - const auto timestamp = parse(file_parts[2]); - - auto & file_info = data_files[partition][file_id]; - if (file_info.timestamp == 0 || file_info.timestamp < timestamp) - { - file_info.key = std::move(key); - file_info.timestamp = timestamp; - } - } - - Strings result; - for (auto & [partition, partition_data] : data_files) - { - LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); - for (auto & [file_id, file_data] : partition_data) - result.push_back(std::move(file_data.key)); - } - return result; - } -}; - - -template -HudiMetadataParser::HudiMetadataParser() : impl(std::make_unique()) -{ -} - -template -Strings HudiMetadataParser::getFiles(const Configuration & configuration, ContextPtr) -{ - return impl->processMetadataFiles(configuration); -} - -template HudiMetadataParser::HudiMetadataParser(); -template Strings HudiMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); - -} - -#endif diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h deleted file mode 100644 index 6727ba2f718..00000000000 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -template -struct HudiMetadataParser -{ -public: - HudiMetadataParser(); - - Strings getFiles(const Configuration & configuration, ContextPtr context); - -private: - struct Impl; - std::shared_ptr impl; -}; - -} diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h deleted file mode 100644 index 711abbde38c..00000000000 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ /dev/null @@ -1,136 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -template -class IStorageDataLake : public Storage -{ -public: - static constexpr auto name = Name::name; - using Configuration = typename Storage::Configuration; - - template - explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) - : Storage(getConfigurationForDataRead(configuration_, context_, {}, mode), context_, std::forward(args)...) - , base_configuration(configuration_) - , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - - template - static StoragePtr create(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) - { - return std::make_shared>(configuration_, context_, mode, std::forward(args)...); - } - - String getName() const override { return name; } - - static ColumnsDescription getTableStructureFromData( - Configuration & base_configuration, - const std::optional & format_settings, - const ContextPtr & local_context) - { - auto configuration = getConfigurationForDataRead(base_configuration, local_context); - return Storage::getTableStructureFromData(configuration, format_settings, local_context); - } - - static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context) - { - return Storage::getConfiguration(engine_args, local_context, /* get_format_from_file */false); - } - - Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - return Storage::getConfiguration(); - } - - void updateConfiguration(const ContextPtr & local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - } - -private: - static Configuration getConfigurationForDataRead( - const Configuration & base_configuration, const ContextPtr & local_context, const Strings & keys = {}, - LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE) - { - auto configuration{base_configuration}; - configuration.update(local_context); - configuration.static_configuration = true; - - try - { - if (keys.empty()) - configuration.keys = getDataFiles(configuration, local_context); - else - configuration.keys = keys; - - LOG_TRACE( - getLogger("DataLake"), - "New configuration path: {}, keys: {}", - configuration.getPath(), fmt::join(configuration.keys, ", ")); - - configuration.connect(local_context); - return configuration; - } - catch (...) - { - if (mode <= LoadingStrictnessLevel::CREATE) - throw; - tryLogCurrentException(__PRETTY_FUNCTION__); - return configuration; - } - } - - static Strings getDataFiles(const Configuration & configuration, const ContextPtr & local_context) - { - return MetadataParser().getFiles(configuration, local_context); - } - - void updateConfigurationImpl(const ContextPtr & local_context) - { - const bool updated = base_configuration.update(local_context); - auto new_keys = getDataFiles(base_configuration, local_context); - - if (!updated && new_keys == Storage::getConfiguration().keys) - return; - - Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys)); - } - - Configuration base_configuration; - std::mutex configuration_update_mutex; - LoggerPtr log; -}; - - -template -static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) -{ - auto configuration = DataLake::getConfiguration(args.engine_args, args.getLocalContext()); - - /// Data lakes use parquet format, no need for schema inference. - if (configuration.format == "auto") - configuration.format = "Parquet"; - - return DataLake::create(configuration, args.getContext(), args.mode, args.table_id, args.columns, args.constraints, - args.comment, getFormatSettings(args.getContext())); -} - -} - -#endif diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp deleted file mode 100644 index 19cd97c3d4f..00000000000 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include - -#if USE_AWS_S3 && USE_AVRO - -namespace DB -{ - -StoragePtr StorageIceberg::create( - const DB::StorageIceberg::Configuration & base_configuration, - DB::ContextPtr context_, - LoadingStrictnessLevel mode, - const DB::StorageID & table_id_, - const DB::ColumnsDescription & columns_, - const DB::ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_) -{ - auto configuration{base_configuration}; - configuration.update(context_); - std::unique_ptr metadata; - NamesAndTypesList schema_from_metadata; - try - { - metadata = parseIcebergMetadata(configuration, context_); - schema_from_metadata = metadata->getTableSchema(); - configuration.keys = metadata->getDataFiles(); - } - catch (...) - { - if (mode <= LoadingStrictnessLevel::CREATE) - throw; - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - return std::make_shared( - std::move(metadata), - configuration, - context_, - table_id_, - columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, - constraints_, - comment, - format_settings_); -} - -StorageIceberg::StorageIceberg( - std::unique_ptr metadata_, - const Configuration & configuration_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_) - : StorageS3(configuration_, context_, table_id_, columns_, constraints_, comment, format_settings_) - , current_metadata(std::move(metadata_)) - , base_configuration(configuration_) -{ -} - -ColumnsDescription StorageIceberg::getTableStructureFromData( - Configuration & base_configuration, - const std::optional &, - const ContextPtr & local_context) -{ - auto configuration{base_configuration}; - configuration.update(local_context); - auto metadata = parseIcebergMetadata(configuration, local_context); - return ColumnsDescription(metadata->getTableSchema()); -} - -void StorageIceberg::updateConfigurationImpl(const ContextPtr & local_context) -{ - const bool updated = base_configuration.update(local_context); - auto new_metadata = parseIcebergMetadata(base_configuration, local_context); - - if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion()) - current_metadata = std::move(new_metadata); - else if (!updated) - return; - - auto updated_configuration{base_configuration}; - /// If metadata wasn't changed, we won't list data files again. - updated_configuration.keys = current_metadata->getDataFiles(); - StorageS3::useConfiguration(updated_configuration); -} - -} - -#endif diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h deleted file mode 100644 index 45cbef0b41b..00000000000 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ /dev/null @@ -1,85 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 && USE_AVRO - -# include -# include -# include -# include -# include -# include -# include - - -namespace DB -{ - -/// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) -/// Right now it's implemented on top of StorageS3 and right now it doesn't support -/// many Iceberg features like schema evolution, partitioning, positional and equality deletes. -/// TODO: Implement Iceberg as a separate storage using IObjectStorage -/// (to support all object storages, not only S3) and add support for missing Iceberg features. -class StorageIceberg : public StorageS3 -{ -public: - static constexpr auto name = "Iceberg"; - - using Configuration = StorageS3::Configuration; - - static StoragePtr create(const Configuration & base_configuration, - ContextPtr context_, - LoadingStrictnessLevel mode, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_); - - StorageIceberg( - std::unique_ptr metadata_, - const Configuration & configuration_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_); - - String getName() const override { return name; } - - static ColumnsDescription getTableStructureFromData( - Configuration & base_configuration, - const std::optional &, - const ContextPtr & local_context); - - static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context) - { - return StorageS3::getConfiguration(engine_args, local_context, /* get_format_from_file */false); - } - - Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - return StorageS3::getConfiguration(); - } - - void updateConfiguration(const ContextPtr & local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - } - -private: - void updateConfigurationImpl(const ContextPtr & local_context); - - std::unique_ptr current_metadata; - Configuration base_configuration; - std::mutex configuration_update_mutex; -}; - -} - -#endif diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp deleted file mode 100644 index d66e21550a3..00000000000 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int S3_ERROR; -} - -std::shared_ptr -S3DataLakeMetadataReadHelper::createReadBuffer(const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration) -{ - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries; - return std::make_shared( - base_configuration.client, - base_configuration.url.bucket, - key, - base_configuration.url.version_id, - request_settings, - context->getReadSettings()); -} - -bool S3DataLakeMetadataReadHelper::exists(const String & key, const StorageS3::Configuration & configuration) -{ - return S3::objectExists(*configuration.client, configuration.url.bucket, key); -} - -std::vector S3DataLakeMetadataReadHelper::listFiles( - const StorageS3::Configuration & base_configuration, const String & prefix, const String & suffix) -{ - const auto & table_path = base_configuration.url.key; - const auto & bucket = base_configuration.url.bucket; - const auto & client = base_configuration.client; - - std::vector res; - S3::ListObjectsV2Request request; - Aws::S3::Model::ListObjectsV2Outcome outcome; - - request.SetBucket(bucket); - request.SetPrefix(std::filesystem::path(table_path) / prefix); - - bool is_finished{false}; - while (!is_finished) - { - outcome = client->ListObjectsV2(request); - if (!outcome.IsSuccess()) - throw S3Exception( - outcome.GetError().GetErrorType(), - "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", - quoteString(bucket), - quoteString(base_configuration.url.key), - backQuote(outcome.GetError().GetExceptionName()), - quoteString(outcome.GetError().GetMessage())); - - const auto & result_batch = outcome.GetResult().GetContents(); - for (const auto & obj : result_batch) - { - const auto & filename = obj.GetKey(); - if (filename.ends_with(suffix)) - res.push_back(filename); - } - - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - is_finished = !outcome.GetResult().GetIsTruncated(); - } - - LOG_TRACE(getLogger("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size()); - - return res; -} - -} -#endif diff --git a/src/Storages/DataLakes/S3MetadataReader.h b/src/Storages/DataLakes/S3MetadataReader.h deleted file mode 100644 index c29a66b3813..00000000000 --- a/src/Storages/DataLakes/S3MetadataReader.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include - -#if USE_AWS_S3 - -#include - -namespace DB -{ - -class ReadBuffer; - -struct S3DataLakeMetadataReadHelper -{ - static std::shared_ptr createReadBuffer( - const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration); - - static bool exists(const String & key, const StorageS3::Configuration & configuration); - - static std::vector listFiles(const StorageS3::Configuration & configuration, const std::string & prefix = "", const std::string & suffix = ""); -}; -} - -#endif diff --git a/src/Storages/DataLakes/StorageDeltaLake.h b/src/Storages/DataLakes/StorageDeltaLake.h deleted file mode 100644 index 8b4ba28d6f7..00000000000 --- a/src/Storages/DataLakes/StorageDeltaLake.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include -#include -#include -#include "config.h" - -#if USE_AWS_S3 -#include -#include -#endif - -namespace DB -{ - -struct StorageDeltaLakeName -{ - static constexpr auto name = "DeltaLake"; -}; - -#if USE_AWS_S3 && USE_PARQUET -using StorageDeltaLakeS3 = IStorageDataLake>; -#endif - -} diff --git a/src/Storages/DataLakes/StorageHudi.h b/src/Storages/DataLakes/StorageHudi.h deleted file mode 100644 index 84666f51405..00000000000 --- a/src/Storages/DataLakes/StorageHudi.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include -#include -#include -#include "config.h" - -#if USE_AWS_S3 -#include -#include -#endif - -namespace DB -{ - -struct StorageHudiName -{ - static constexpr auto name = "Hudi"; -}; - -#if USE_AWS_S3 -using StorageHudiS3 = IStorageDataLake>; -#endif - -} diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp deleted file mode 100644 index 118600f7212..00000000000 --- a/src/Storages/DataLakes/registerDataLakes.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include - - -namespace DB -{ - -#define REGISTER_DATA_LAKE_STORAGE(STORAGE, NAME) \ - factory.registerStorage( \ - NAME, \ - [](const StorageFactory::Arguments & args) \ - { \ - return createDataLakeStorage(args);\ - }, \ - { \ - .supports_settings = false, \ - .supports_schema_inference = true, \ - .source_access_type = AccessType::S3, \ - }); - -#if USE_PARQUET -void registerStorageDeltaLake(StorageFactory & factory) -{ - REGISTER_DATA_LAKE_STORAGE(StorageDeltaLakeS3, StorageDeltaLakeName::name) -} -#endif - -#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. - -void registerStorageIceberg(StorageFactory & factory) -{ - REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name) -} - -#endif - -void registerStorageHudi(StorageFactory & factory) -{ - REGISTER_DATA_LAKE_STORAGE(StorageHudiS3, StorageHudiName::name) -} - -} - -#endif diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index 2d052255ac5..06d4c185840 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -173,7 +173,7 @@ bool DistributedAsyncInsertBatch::valid() { if (!fs::exists(file)) { - LOG_WARNING(parent.log, "File {} does not exists, likely due abnormal shutdown", file); + LOG_WARNING(parent.log, "File {} does not exist, likely due abnormal shutdown", file); res = false; } } diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 1ee77611191..d471c67553d 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -554,7 +554,7 @@ void DistributedAsyncInsertDirectoryQueue::processFilesWithBatching(const Settin { if (!fs::exists(file_path)) { - LOG_WARNING(log, "File {} does not exists, likely due to current_batch.txt processing", file_path); + LOG_WARNING(log, "File {} does not exist, likely due to current_batch.txt processing", file_path); continue; } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index b89a8d7bcfd..e556bda2561 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -173,7 +173,10 @@ void DistributedSink::writeAsync(const Block & block) else { if (storage.getShardingKeyExpr() && (cluster->getShardsInfo().size() > 1)) - return writeSplitAsync(block); + { + writeSplitAsync(block); + return; + } writeAsyncImpl(block); ++inserted_blocks; @@ -436,6 +439,10 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si void DistributedSink::writeSync(const Block & block) { + std::lock_guard lock(execution_mutex); + if (isCancelled()) + return; + OpenTelemetry::SpanHolder span(__PRETTY_FUNCTION__); const Settings & settings = context->getSettingsRef(); @@ -537,6 +544,10 @@ void DistributedSink::onFinish() LOG_DEBUG(log, "It took {} sec. to insert {} blocks, {} rows per second. {}", elapsed, inserted_blocks, inserted_rows / elapsed, getCurrentStateDescription()); }; + std::lock_guard lock(execution_mutex); + if (isCancelled()) + return; + /// Pool finished means that some exception had been thrown before, /// and scheduling new jobs will return "Cannot schedule a task" error. if (insert_sync && pool && !pool->finished()) @@ -587,6 +598,7 @@ void DistributedSink::onFinish() void DistributedSink::onCancel() { + std::lock_guard lock(execution_mutex); if (pool && !pool->finished()) { try diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 7a9e89c9e94..a4c95633595 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -113,6 +113,8 @@ private: std::optional pool; ThrottlerPtr throttler; + std::mutex execution_mutex; + struct JobReplica { JobReplica() = default; diff --git a/src/Storages/FileLog/FileLogConsumer.h b/src/Storages/FileLog/FileLogConsumer.h index e44bfeb1806..42473abba42 100644 --- a/src/Storages/FileLog/FileLogConsumer.h +++ b/src/Storages/FileLog/FileLogConsumer.h @@ -33,7 +33,7 @@ public: const String & getCurrentRecord() const { return current[-1].data; } private: - enum class BufferStatus + enum class BufferStatus : uint8_t { INIT, NO_RECORD_RETURNED, diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index d3e31101d3b..abd4b4ce23b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -416,7 +416,7 @@ void StorageFileLog::drop() { try { - std::filesystem::remove_all(metadata_base_path); + (void)std::filesystem::remove_all(metadata_base_path); } catch (...) { @@ -1009,7 +1009,7 @@ bool StorageFileLog::updateFileInfos() file_infos.meta_by_inode.erase(meta); if (std::filesystem::exists(getFullMetaPath(file_name))) - std::filesystem::remove(getFullMetaPath(file_name)); + (void)std::filesystem::remove(getFullMetaPath(file_name)); file_infos.context_by_name.erase(it); } else diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 0434213c558..6c8dbb98cb8 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -63,7 +63,7 @@ public: const auto & getFormatName() const { return format_name; } - enum class FileStatus + enum class FileStatus : uint8_t { OPEN, /// First time open file after table start up. NO_CHANGE, diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp deleted file mode 100644 index 7a02a3039db..00000000000 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ /dev/null @@ -1,1203 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ACCESS_DENIED; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_DETECT_FORMAT; -} -namespace -{ - struct HDFSFileInfoDeleter - { - /// Can have only one entry (see hdfsGetPathInfo()) - void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); } - }; - using HDFSFileInfoPtr = std::unique_ptr; - - /* Recursive directory listing with matched paths as a result. - * Have the same method in StorageFile. - */ - std::vector LSWithRegexpMatching( - const String & path_for_ls, - const HDFSFSPtr & fs, - const String & for_match) - { - std::vector result; - - const size_t first_glob_pos = for_match.find_first_of("*?{"); - - if (first_glob_pos == std::string::npos) - { - const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal(); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str())); - if (hdfs_info) // NOLINT - { - result.push_back(StorageHDFS::PathWithInfo{ - String(path), - StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}}); - } - return result; - } - - const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); - const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - - const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); - - const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); - - re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); - if (!matcher.ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); - - HDFSFileInfo ls; - ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); - if (ls.file_info == nullptr && errno != ENOENT) // NOLINT - { - // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. - throw Exception( - ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError())); - } - - if (!ls.file_info && ls.length > 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); - for (int i = 0; i < ls.length; ++i) - { - const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); - const size_t last_slash = full_path.rfind('/'); - const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; - const bool is_directory = ls.file_info[i].mKind == 'D'; - /// Condition with type of current file_info means what kind of path is it in current iteration of ls - if (!is_directory && !looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - result.push_back(StorageHDFS::PathWithInfo{ - String(full_path), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); - } - else if (is_directory && looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, - suffix_with_globs.substr(next_slash_after_glob_pos)); - /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); - } - } - } - - return result; - } - - std::pair getPathFromUriAndUriWithoutPath(const String & uri) - { - auto pos = uri.find("//"); - if (pos != std::string::npos && pos + 2 < uri.length()) - { - pos = uri.find('/', pos + 2); - if (pos != std::string::npos) - return {uri.substr(pos), uri.substr(0, pos)}; - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); - } - - std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context) - { - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - Strings paths = expandSelectionGlob(path_from_uri); - - std::vector res; - - for (const auto & path : paths) - { - auto part_of_res = LSWithRegexpMatching("/", fs, path); - res.insert(res.end(), part_of_res.begin(), part_of_res.end()); - } - return res; - } -} - -StorageHDFS::StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - const ContextPtr & context_, - const String & compression_method_, - const bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , WithContext(context_) - , uris({uri_}) - , format_name(format_name_) - , compression_method(compression_method_) - , distributed_processing(distributed_processing_) - , partition_by(partition_by_) -{ - if (format_name != "auto") - FormatFactory::instance().checkFormatName(format_name); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - checkHDFSURL(uri_); - - String path = uri_.substr(uri_.find('/', uri_.find("//") + 2)); - is_path_with_globs = path.find_first_of("*?{") != std::string::npos; - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - if (format_name == "auto") - std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_); - else - columns = getTableStructureFromData(format_name, uri_, compression_method, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (format_name == "auto") - format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second; - - /// We don't allow special columns in HDFS storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::vector & paths_with_info_, - const String & uri_without_path_, - std::optional format_, - const String & compression_method_, - const ContextPtr & context_) - : WithContext(context_) - , paths_with_info(paths_with_info_) - , uri_without_path(uri_without_path_) - , format(std::move(format_)) - , compression_method(compression_method_) - { - } - - Data next() override - { - bool is_first = current_index == 0; - /// For default mode check cached columns for all paths on first iteration. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(paths_with_info)) - return {nullptr, cached_columns, format}; - } - - StorageHDFS::PathWithInfo path_with_info; - - while (true) - { - if (current_index == paths_with_info.size()) - { - if (is_first) - { - if (format) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because all files are empty. " - "You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually"); - } - return {nullptr, std::nullopt, format}; - } - - path_with_info = paths_with_info[current_index++]; - if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) - continue; - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - std::vector paths = {path_with_info}; - if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns, format}; - } - - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) - { - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt, format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - Strings sources; - sources.reserve(paths_with_info.size()); - std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext()); - StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_index != 0) - return paths_with_info[current_index - 1].path; - - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_index > 0 && current_index <= paths_with_info.size()); - auto path_with_info = paths_with_info[current_index - 1]; - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - } - - private: - std::optional tryGetColumnsFromCache(const std::vector & paths_with_info_) - { - auto context = getContext(); - - if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs) - return std::nullopt; - - auto & schema_cache = StorageHDFS::getSchemaCache(context); - for (const auto & path_with_info : paths_with_info_) - { - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - - auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str())); - if (hdfs_info) - return hdfs_info->mLastMod; - - return std::nullopt; - }; - - String url = uri_without_path + path_with_info.path; - if (format) - { - auto cache_key = getKeyForSchemaCache(url, *format, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(url, format_name, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - const std::vector & paths_with_info; - const String & uri_without_path; - std::optional format; - const String & compression_method; - size_t current_index = 0; - }; -} - -std::pair StorageHDFS::getTableStructureAndFormatFromDataImpl( - std::optional format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx) -{ - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - auto paths_with_info = getPathsList(path_from_uri, uri, ctx); - - if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format))) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path." - " You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path." - " You can specify the format manually"); - } - - ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); - if (format) - return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx); -} - -std::pair StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx); -} - -ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first; -} - -class HDFSSource::DisclosedGlobIterator::Impl -{ -public: - Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - uris = getPathsList(path_from_uri, uri_without_path, context); - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & path_with_info : uris) - paths.push_back(path_with_info.path); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); - } - auto file_progress_callback = context->getFileProgressCallback(); - - for (auto & elem : uris) - { - elem.path = uri_without_path + elem.path; - if (file_progress_callback && elem.info) - file_progress_callback(FileProgress(0, elem.info->size)); - } - uris_iter = uris.begin(); - } - - StorageHDFS::PathWithInfo next() - { - std::lock_guard lock(mutex); - if (uris_iter != uris.end()) - { - auto answer = *uris_iter; - ++uris_iter; - return answer; - } - return {}; - } -private: - std::mutex mutex; - std::vector uris; - std::vector::iterator uris_iter; -}; - -class HDFSSource::URISIterator::Impl : WithContext -{ -public: - explicit Impl(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_) - : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) - { - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & uri : uris) - paths.push_back(getPathFromUriAndUriWithoutPath(uri).first); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext()); - } - - if (!uris.empty()) - { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); - builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); - fs = createHDFSFS(builder.get()); - } - } - - StorageHDFS::PathWithInfo next() - { - String uri; - HDFSFileInfoPtr hdfs_info; - do - { - size_t current_index = index.fetch_add(1); - if (current_index >= uris.size()) - return {"", {}}; - - uri = uris[current_index]; - auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); - hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str())); - } - /// Skip non-existed files. - while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); - - std::optional info; - if (hdfs_info) - { - info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - if (file_progress_callback) - file_progress_callback(FileProgress(0, hdfs_info->mSize)); - } - - return {uri, info}; - } - -private: - std::atomic_size_t index = 0; - Strings uris; - HDFSBuilderWrapper builder; - HDFSFSPtr fs; - std::function file_progress_callback; -}; - -HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uri, predicate, virtual_columns, context)) {} - -StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::URISIterator::URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uris_, predicate, virtual_columns, context)) -{ -} - -StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - const ContextPtr & context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_) - : ISource(info.source_header, false) - , WithContext(context_) - , storage(std::move(storage_)) - , block_for_format(info.format_header) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , max_block_size(max_block_size_) - , file_iterator(file_iterator_) - , columns_description(info.columns_description) - , need_only_count(need_only_count_) -{ - initialize(); -} - -HDFSSource::~HDFSSource() = default; - -bool HDFSSource::initialize() -{ - bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files; - StorageHDFS::PathWithInfo path_with_info; - while (true) - { - path_with_info = (*file_iterator)(); - if (path_with_info.path.empty()) - return false; - - if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0) - continue; - - current_path = path_with_info.path; - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); - - std::optional file_size; - if (!path_with_info.info) - { - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str())); - if (hdfs_info) - path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - } - - if (path_with_info.info) - file_size = path_with_info.info->size; - - auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); - auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); - if (!skip_empty_files || !impl->eof()) - { - impl->setProgressCallback(getContext()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - break; - } - } - - current_path = path_with_info.path; - current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt; - - QueryPipelineBuilder builder; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use a special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) - { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); - } - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from the chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - return true; -} - -String HDFSSource::getName() const -{ - return "HDFSSource"; -} - -Chunk HDFSSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (input_format) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size); - return chunk; - } - - if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(current_path, total_rows_in_file); - - total_rows_in_file = 0; - - reader.reset(); - pipeline.reset(); - input_format.reset(); - read_buf.reset(); - - if (!initialize()) - break; - } - return {}; -} - -void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info) -{ - auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - return std::nullopt; - }; - - return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class HDFSSink : public SinkToStorage -{ -public: - HDFSSink(const String & uri, - const String & format, - const Block & sample_block, - const ContextPtr & context, - const CompressionMethod compression_method) - : SinkToStorage(sample_block) - { - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); - } - - String getName() const override { return "HDFSSink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->sync(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - std::unique_ptr write_buf; - OutputFormatPtr writer; - std::mutex cancel_mutex; - bool cancelled = false; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, const String & uri, size_t sequence_number) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - if (context->getSettingsRef().hdfs_truncate_on_insert || hdfsExists(fs.get(), path_from_uri.c_str())) - return std::nullopt; - - if (context->getSettingsRef().hdfs_create_new_file_on_insert) - { - auto pos = uri.find_first_of('.', uri.find_last_of('/')); - String new_uri; - do - { - new_uri = uri.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : uri.substr(pos)); - ++sequence_number; - } - while (!hdfsExists(fs.get(), new_uri.c_str())); - - return new_uri; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, " - "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert", - path_from_uri); - } -} - -class PartitionedHDFSSink : public PartitionedSink -{ -public: - PartitionedHDFSSink( - const ASTPtr & partition_by, - const String & uri_, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - const CompressionMethod compression_method_) - : PartitionedSink(partition_by, context_, sample_block_) - , uri(uri_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto path = PartitionedSink::replaceWildcards(uri, partition_id); - PartitionedSink::validatePartitionKey(path, true); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(context, path, 1)) - path = *new_path; - return std::make_shared(path, format, sample_block, context, compression_method); - } - -private: - const String uri; - const String format; - const Block sample_block; - ContextPtr context; - const CompressionMethod compression_method; -}; - - -bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); -} - -class ReadFromHDFS : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromHDFS"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromHDFS( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - ReadFromFormatInfo info_, - bool need_only_count_, - std::shared_ptr storage_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter( - DataStream{.header = std::move(sample_block)}, - column_names_, - query_info_, - storage_snapshot_, - context_) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , storage(std::move(storage_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - ReadFromFormatInfo info; - const bool need_only_count; - std::shared_ptr storage; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageHDFS::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context_, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && context_->getSettingsRef().optimize_count_from_files; - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - context_, - read_from_format_info.source_header, - std::move(read_from_format_info), - need_only_count, - std::move(this_ptr), - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared( - [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo { - return StorageHDFS::PathWithInfo{callback(), std::nullopt}; - }); - } - else if (storage->is_path_with_globs) - { - /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([glob_iterator]() - { - return glob_iterator->next(); - }); - } - else - { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([uris_iterator]() - { - return uris_iterator->next(); - }); - } -} - -void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - storage, - context, - max_block_size, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/) -{ - String current_uri = uris.front(); - - bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; - const auto * insert_query = dynamic_cast(query.get()); - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && has_wildcards; - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } - else - { - if (is_path_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - if (auto new_uri = checkAndGetNewFileOnInsertIfNeeded(context_, uris.front(), uris.size())) - { - uris.push_back(*new_uri); - current_uri = *new_uri; - } - - return std::make_shared(current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } -} - -void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2); - const String url = uris[0].substr(0, begin_of_path); - - HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - - for (const auto & uri : uris) - { - const String path = uri.substr(begin_of_path); - int ret = hdfsDelete(fs.get(), path.data(), 0); - if (ret) - throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); - } -} - - -void registerStorageHDFS(StorageFactory & factory) -{ - factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage HDFS requires 1, 2 or 3 arguments: " - "url, name of used format (taken from file extension by default) and optional compression method."); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); - - String url = checkAndGetLiteralArgument(engine_args[0], "url"); - - String format_name = "auto"; - if (engine_args.size() > 1) - { - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - format_name = checkAndGetLiteralArgument(engine_args[1], "format_name"); - } - - if (format_name == "auto") - format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto"); - - String compression_method; - if (engine_args.size() == 3) - { - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); - compression_method = checkAndGetLiteralArgument(engine_args[2], "compression_method"); - } else compression_method = "auto"; - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by); - }, - { - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::HDFS, - }); -} - -SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h deleted file mode 100644 index b8faa27d678..00000000000 --- a/src/Storages/HDFS/StorageHDFS.h +++ /dev/null @@ -1,188 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class IInputFormat; - -/** - * This class represents table engine for external hdfs files. - * Read method is supported for now. - */ -class StorageHDFS final : public IStorage, WithContext -{ -public: - struct PathInfo - { - time_t last_mod_time; - size_t size; - }; - - struct PathWithInfo - { - PathWithInfo() = default; - PathWithInfo(const String & path_, const std::optional & info_) : path(path_), info(info_) {} - String path; - std::optional info; - }; - - StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - const ContextPtr & context_, - const String & compression_method_ = "", - bool distributed_processing_ = false, - ASTPtr partition_by = nullptr); - - String getName() const override { return "HDFS"; } - - void read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override; - - void truncate( - const ASTPtr & query, - const StorageMetadataPtr & metadata_snapshot, - ContextPtr local_context, - TableExclusiveLockHolder &) override; - - bool supportsPartitionBy() const override { return true; } - - /// Check if the format is column-oriented. - /// Is is useful because column oriented formats could effectively skip unknown columns - /// So we can create a header of only required columns in read method and ask - /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSubsetOfColumns(const ContextPtr & context_) const; - - bool supportsSubcolumns() const override { return true; } - - static ColumnsDescription getTableStructureFromData( - const String & format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx); - - static std::pair getTableStructureAndFormatFromData( - const String & uri, - const String & compression_method, - const ContextPtr & ctx); - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -protected: - friend class HDFSSource; - friend class ReadFromHDFS; - -private: - static std::pair getTableStructureAndFormatFromDataImpl( - std::optional format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx); - - std::vector uris; - String format_name; - String compression_method; - const bool distributed_processing; - ASTPtr partition_by; - bool is_path_with_globs; - - LoggerPtr log = getLogger("StorageHDFS"); -}; - -class PullingPipelineExecutor; - -class HDFSSource : public ISource, WithContext -{ -public: - class DisclosedGlobIterator - { - public: - DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context); - StorageHDFS::PathWithInfo next(); - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class URISIterator - { - public: - URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context); - StorageHDFS::PathWithInfo next(); - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - using IteratorWrapper = std::function; - using StorageHDFSPtr = std::shared_ptr; - - HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - const ContextPtr & context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_); - - ~HDFSSource() override; - - String getName() const override; - - Chunk generate() override; - -private: - void addNumRowsToCache(const String & path, size_t num_rows); - std::optional tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info); - - StorageHDFSPtr storage; - Block block_for_format; - NamesAndTypesList requested_columns; - NamesAndTypesList requested_virtual_columns; - UInt64 max_block_size; - std::shared_ptr file_iterator; - ColumnsDescription columns_description; - bool need_only_count; - size_t total_rows_in_file = 0; - - std::unique_ptr read_buf; - std::shared_ptr input_format; - std::unique_ptr pipeline; - std::unique_ptr reader; - String current_path; - std::optional current_file_size; - - /// Recreate ReadBuffer and PullingPipelineExecutor for each file. - bool initialize(); -}; -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp deleted file mode 100644 index bde8b84e349..00000000000 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "config.h" -#include "Interpreters/Context_fwd.h" - -#if USE_HDFS - -#include - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageHDFSCluster::StorageHDFSCluster( - ContextPtr context_, - const String & cluster_name_, - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & compression_method) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")")) - , uri(uri_) - , format_name(format_name_) -{ - checkHDFSURL(uri_); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - if (format_name == "auto") - std::tie(columns, format_name) = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_); - else - columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_); - storage_metadata.setColumns(columns); - } - else - { - if (format_name == "auto") - format_name = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_).second; - - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query)); - - TableFunctionHDFSCluster::updateStructureAndFormatArgumentsIfNeeded( - expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context); -} - - -RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared(uri, predicate, getVirtualsList(), context); - auto callback = std::make_shared>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; }); - return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; -} - -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h deleted file mode 100644 index 0b5c6242aa9..00000000000 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include - -#include -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageHDFSCluster : public IStorageCluster -{ -public: - StorageHDFSCluster( - ContextPtr context_, - const String & cluster_name_, - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & compression_method); - - std::string getName() const override { return "HDFSCluster"; } - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -private: - void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; - - String uri; - String format_name; -}; - - -} - -#endif diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index 0f9d3364ffd..81c167165d3 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index 629c8689263..9098e20946b 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -163,7 +163,7 @@ void HiveORCFile::prepareReader() in = std::make_unique(namenode_url, path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); auto format_settings = getFormatSettings(getContext()); std::atomic is_stopped{0}; - auto result = arrow::adapters::orc::ORCFileReader::Open(asArrowFile(*in, format_settings, is_stopped, "ORC", ORC_MAGIC_BYTES), arrow::default_memory_pool()); + auto result = arrow::adapters::orc::ORCFileReader::Open(asArrowFile(*in, format_settings, is_stopped, "ORC", ORC_MAGIC_BYTES), ArrowMemoryPool::instance()); THROW_ARROW_NOT_OK(result.status()); reader = std::move(result).ValueOrDie(); } @@ -282,7 +282,7 @@ void HiveParquetFile::prepareReader() in = std::make_unique(namenode_url, path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); auto format_settings = getFormatSettings(getContext()); std::atomic is_stopped{0}; - THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES), arrow::default_memory_pool(), &reader)); + THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES), ArrowMemoryPool::instance(), &reader)); } void HiveParquetFile::loadSplitMinMaxIndexesImpl() diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 601c7f2a310..a9468ce7d3d 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace orc { @@ -35,7 +35,7 @@ public: using MinMaxIndex = IMergeTreeDataPart::MinMaxIndex; using MinMaxIndexPtr = std::shared_ptr; - enum class FileFormat + enum class FileFormat : uint8_t { RC_FILE, TEXT, @@ -65,8 +65,8 @@ public: {ORC_INPUT_FORMAT, FileFormat::ORC}, }; - static inline bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.contains(format_class); } - static inline FileFormat toFileFormat(const String & format_class) + static bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.contains(format_class); } + static FileFormat toFileFormat(const String & format_class) { if (isFormatClass(format_class)) { diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 88ab8e15e76..28d8128e052 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -38,8 +38,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -921,9 +921,7 @@ void ReadFromHive::initializePipeline(QueryPipelineBuilder & pipeline, const Bui } sources_info->hive_files = std::move(*hive_files); - - if (num_streams > sources_info->hive_files.size()) - num_streams = sources_info->hive_files.size(); + num_streams = std::min(num_streams, sources_info->hive_files.size()); Pipes pipes; for (size_t i = 0; i < num_streams; ++i) diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 67ef153af0e..8a457dd6e01 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include @@ -68,12 +68,12 @@ private: using FileInfo = HiveMetastoreClient::FileInfo; using HiveTableMetadataPtr = HiveMetastoreClient::HiveTableMetadataPtr; - enum class PruneLevel + enum class PruneLevel : uint8_t { - None, /// Do not prune - Partition, - File, - Split, + None = 0, /// Do not prune + Partition = 1, + File = 2, + Split = 3, Max = Split, }; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index b532abc9074..9afafe9f52b 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include @@ -27,11 +27,14 @@ namespace ErrorCodes extern const int CANNOT_RESTORE_TABLE; } -IStorage::IStorage(StorageID storage_id_) +IStorage::IStorage(StorageID storage_id_, std::unique_ptr metadata_) : storage_id(std::move(storage_id_)) - , metadata(std::make_unique()) , virtuals(std::make_unique()) { + if (metadata_) + metadata.set(std::move(metadata_)); + else + metadata.set(std::make_unique()); } bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const @@ -233,7 +236,7 @@ StorageID IStorage::getStorageID() const return storage_id; } -ConditionEstimator IStorage::getConditionEstimatorByPredicate(const SelectQueryInfo &, const StorageSnapshotPtr &, ContextPtr) const +ConditionEstimator IStorage::getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const { return {}; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 87a04c3fcc6..9d6b3457a24 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -20,7 +20,6 @@ #include #include -#include namespace DB @@ -99,7 +98,7 @@ class IStorage : public std::enable_shared_from_this, public TypePromo public: IStorage() = delete; /// Storage metadata can be set separately in setInMemoryMetadata method - explicit IStorage(StorageID storage_id_); + explicit IStorage(StorageID storage_id_, std::unique_ptr metadata_ = nullptr); IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; @@ -136,7 +135,7 @@ public: /// Returns true if the storage supports queries with the PREWHERE section. virtual bool supportsPrewhere() const { return false; } - virtual ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, const StorageSnapshotPtr &, ContextPtr) const; + virtual ConditionEstimator getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const; /// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported. /// This is needed for engines whose aggregates data from multiple tables, like Merge. @@ -172,8 +171,10 @@ public: /// This method can return true for readonly engines that return the same rows for reading (such as SystemNumbers) virtual bool supportsTransactions() const { return false; } + /// Returns true if the storage supports storing of data type Object. + virtual bool supportsDynamicSubcolumnsDeprecated() const { return false; } + /// Returns true if the storage supports storing of dynamic subcolumns. - /// For now it makes sense only for data type Object. virtual bool supportsDynamicSubcolumns() const { return false; } /// Requires squashing small blocks to large for optimal storage. @@ -259,6 +260,9 @@ public: /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } + /// Return true if storage has any projection. + virtual bool hasProjection() const { return false; } + /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index ab45ce877c2..9c5b29ae265 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -86,7 +86,8 @@ private: void ReadFromCluster::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index f160d1c0855..a3bc97779b3 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -82,17 +82,17 @@ public: auto pollTimeout() const { return poll_timeout; } - inline bool hasMorePolledMessages() const + bool hasMorePolledMessages() const { return (stalled_status == NOT_STALLED) && (current != messages.end()); } - inline bool polledDataUnusable() const + bool polledDataUnusable() const { return (stalled_status != NOT_STALLED) && (stalled_status != NO_MESSAGES_RETURNED); } - inline bool isStalled() const { return stalled_status != NOT_STALLED; } + bool isStalled() const { return stalled_status != NOT_STALLED; } void storeLastReadMessageOffset(); void resetToLastCommitted(const char * msg); diff --git a/src/Storages/Kafka/KafkaProducer.cpp b/src/Storages/Kafka/KafkaProducer.cpp index 77676fb010b..8b7f03c52ed 100644 --- a/src/Storages/Kafka/KafkaProducer.cpp +++ b/src/Storages/Kafka/KafkaProducer.cpp @@ -60,7 +60,7 @@ void KafkaProducer::produce(const String & message, size_t rows_in_message, cons { const auto & timestamp_column = assert_cast(*columns[timestamp_column_index.value()]); const auto timestamp = std::chrono::seconds{timestamp_column.getElement(last_row)}; - builder.timestamp(timestamp); + (void)builder.timestamp(timestamp); } while (!shutdown_called) diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 03a30d47d91..f5c5d093ce1 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp index 0125b958a41..a3ef327dc24 100644 --- a/src/Storages/MaterializedView/RefreshSet.cpp +++ b/src/Storages/MaterializedView/RefreshSet.cpp @@ -116,7 +116,7 @@ RefreshSet::InfoContainer RefreshSet::getInfo() const lock.unlock(); InfoContainer res; - for (auto [id, task] : tasks_copy) + for (const auto & [id, task] : tasks_copy) res.push_back(task->getInfo()); return res; } diff --git a/src/Storages/MergeTree/ActiveDataPartSet.h b/src/Storages/MergeTree/ActiveDataPartSet.h index 3c644c89b8c..ca744b3ed2a 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/src/Storages/MergeTree/ActiveDataPartSet.h @@ -22,7 +22,7 @@ using Strings = std::vector; class ActiveDataPartSet { public: - enum class AddPartOutcome + enum class AddPartOutcome : uint8_t { Added, HasCovering, diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h index 5092fbdd864..5da2a714b02 100644 --- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h +++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h @@ -29,7 +29,7 @@ struct ApproximateNearestNeighborInformation using Embedding = std::vector; Embedding reference_vector; - enum class Metric + enum class Metric : uint8_t { Unknown, L2, @@ -40,7 +40,7 @@ struct ApproximateNearestNeighborInformation String column_name; UInt64 limit; - enum class Type + enum class Type : uint8_t { OrderBy, Where diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp index 56a4378cf9a..0a69bf1109f 100644 --- a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp +++ b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp @@ -93,7 +93,6 @@ String BackgroundJobsAssignee::toString(Type type) case Type::Moving: return "Moving"; } - UNREACHABLE(); } void BackgroundJobsAssignee::start() diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.h b/src/Storages/MergeTree/BackgroundJobsAssignee.h index 9369ebe9135..80ddead3e56 100644 --- a/src/Storages/MergeTree/BackgroundJobsAssignee.h +++ b/src/Storages/MergeTree/BackgroundJobsAssignee.h @@ -55,7 +55,7 @@ public: /// e.g. merges, mutations and fetches. The same will be for Plain MergeTree except there is no /// replication queue, so we will just scan parts and decide what to do. /// Moving operations are the same for all types of MergeTree and also have their own timetable. - enum class Type + enum class Type : uint8_t { DataProcessing, Moving diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h index c9a4887cca3..bf29aaf32d0 100644 --- a/src/Storages/MergeTree/BackgroundProcessList.h +++ b/src/Storages/MergeTree/BackgroundProcessList.h @@ -87,7 +87,7 @@ public: virtual void onEntryCreate(const Entry & /* entry */) {} virtual void onEntryDestroy(const Entry & /* entry */) {} - virtual inline ~BackgroundProcessList() = default; + virtual ~BackgroundProcessList() = default; }; } diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/Backup.cpp similarity index 75% rename from src/Storages/MergeTree/localBackup.cpp rename to src/Storages/MergeTree/Backup.cpp index 0698848fa70..8ba37ffc042 100644 --- a/src/Storages/MergeTree/localBackup.cpp +++ b/src/Storages/MergeTree/Backup.cpp @@ -1,9 +1,11 @@ -#include "localBackup.h" +#include "Backup.h" #include +#include #include #include + namespace DB { @@ -16,8 +18,9 @@ namespace ErrorCodes namespace { -void localBackupImpl( - const DiskPtr & disk, +void BackupImpl( + const DiskPtr & src_disk, + const DiskPtr & dst_disk, IDiskTransaction * transaction, const String & source_path, const String & destination_path, @@ -38,41 +41,42 @@ void localBackupImpl( if (transaction) transaction->createDirectories(destination_path); else - disk->createDirectories(destination_path); + dst_disk->createDirectories(destination_path); - for (auto it = disk->iterateDirectory(source_path); it->isValid(); it->next()) + for (auto it = src_disk->iterateDirectory(source_path); it->isValid(); it->next()) { auto source = it->path(); auto destination = fs::path(destination_path) / it->name(); - if (!disk->isDirectory(source)) + if (!src_disk->isDirectory(source)) { if (make_source_readonly) { if (transaction) transaction->setReadOnly(source); else - disk->setReadOnly(source); + src_disk->setReadOnly(source); } if (copy_instead_of_hardlinks || files_to_copy_instead_of_hardlinks.contains(it->name())) { if (transaction) transaction->copyFile(source, destination, read_settings, write_settings); else - disk->copyFile(source, *disk, destination, read_settings, write_settings); + src_disk->copyFile(source, *dst_disk, destination, read_settings, write_settings); } else { if (transaction) transaction->createHardLink(source, destination); else - disk->createHardLink(source, destination); + src_disk->createHardLink(source, destination); } } else { - localBackupImpl( - disk, + BackupImpl( + src_disk, + dst_disk, transaction, source, destination, @@ -123,8 +127,11 @@ private: }; } -void localBackup( - const DiskPtr & disk, +/// src_disk and dst_disk can be the same disk when local backup. +/// copy_instead_of_hardlinks must be true when remote backup. +void Backup( + const DiskPtr & src_disk, + const DiskPtr & dst_disk, const String & source_path, const String & destination_path, const ReadSettings & read_settings, @@ -135,10 +142,10 @@ void localBackup( const NameSet & files_to_copy_intead_of_hardlinks, DiskTransactionPtr disk_transaction) { - if (disk->exists(destination_path) && !disk->isDirectoryEmpty(destination_path)) + if (dst_disk->exists(destination_path) && !dst_disk->isDirectoryEmpty(destination_path)) { throw DB::Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists and is not empty.", - DB::fullPath(disk, destination_path)); + DB::fullPath(dst_disk, destination_path)); } size_t try_no = 0; @@ -154,8 +161,9 @@ void localBackup( { if (disk_transaction) { - localBackupImpl( - disk, + BackupImpl( + src_disk, + dst_disk, disk_transaction.get(), source_path, destination_path, @@ -165,27 +173,29 @@ void localBackup( /* level= */ 0, max_level, copy_instead_of_hardlinks, - files_to_copy_intead_of_hardlinks); + files_to_copy_intead_of_hardlinks + ); } else if (copy_instead_of_hardlinks) { - CleanupOnFail cleanup([disk, destination_path]() { disk->removeRecursive(destination_path); }); - disk->copyDirectoryContent(source_path, disk, destination_path, read_settings, write_settings, /*cancellation_hook=*/{}); + CleanupOnFail cleanup([dst_disk, destination_path]() { dst_disk->removeRecursive(destination_path); }); + src_disk->copyDirectoryContent(source_path, dst_disk, destination_path, read_settings, write_settings, /*cancellation_hook=*/{}); cleanup.success(); } else { std::function cleaner; - if (disk->supportZeroCopyReplication()) + if (dst_disk->supportZeroCopyReplication()) /// Note: this code will create garbage on s3. We should always remove `copy_instead_of_hardlinks` files. /// The third argument should be a list of exceptions, but (looks like) it is ignored for keep_all_shared_data = true. - cleaner = [disk, destination_path]() { disk->removeSharedRecursive(destination_path, /*keep_all_shared_data*/ true, {}); }; + cleaner = [dst_disk, destination_path]() { dst_disk->removeSharedRecursive(destination_path, /*keep_all_shared_data*/ true, {}); }; else - cleaner = [disk, destination_path]() { disk->removeRecursive(destination_path); }; + cleaner = [dst_disk, destination_path]() { dst_disk->removeRecursive(destination_path); }; CleanupOnFail cleanup(std::move(cleaner)); - localBackupImpl( - disk, + BackupImpl( + src_disk, + dst_disk, disk_transaction.get(), source_path, destination_path, diff --git a/src/Storages/MergeTree/localBackup.h b/src/Storages/MergeTree/Backup.h similarity index 94% rename from src/Storages/MergeTree/localBackup.h rename to src/Storages/MergeTree/Backup.h index 3490db9726e..3421640ace5 100644 --- a/src/Storages/MergeTree/localBackup.h +++ b/src/Storages/MergeTree/Backup.h @@ -24,8 +24,9 @@ struct WriteSettings; * * If `transaction` is provided, the changes will be added to it instead of performend on disk. */ - void localBackup( - const DiskPtr & disk, + void Backup( + const DiskPtr & src_disk, + const DiskPtr & dst_disk, const String & source_path, const String & destination_path, const ReadSettings & read_settings, diff --git a/src/Storages/MergeTree/BoolMask.cpp b/src/Storages/MergeTree/BoolMask.cpp index 8ae75394498..a502e385a32 100644 --- a/src/Storages/MergeTree/BoolMask.cpp +++ b/src/Storages/MergeTree/BoolMask.cpp @@ -1,5 +1,5 @@ #include "BoolMask.h" - +/// BoolMask::can_be_X = true implies it will never change during BoolMask::combine. const BoolMask BoolMask::consider_only_can_be_true(false, true); const BoolMask BoolMask::consider_only_can_be_false(true, false); diff --git a/src/Storages/MergeTree/BoolMask.h b/src/Storages/MergeTree/BoolMask.h index 11f9238aa28..05b55a5f245 100644 --- a/src/Storages/MergeTree/BoolMask.h +++ b/src/Storages/MergeTree/BoolMask.h @@ -1,5 +1,7 @@ #pragma once +#include + /// Multiple Boolean values. That is, two Boolean values: can it be true, can it be false. struct BoolMask { @@ -7,31 +9,46 @@ struct BoolMask bool can_be_false = false; BoolMask() = default; - BoolMask(bool can_be_true_, bool can_be_false_) : can_be_true(can_be_true_), can_be_false(can_be_false_) {} + BoolMask(bool can_be_true_, bool can_be_false_) : can_be_true(can_be_true_), can_be_false(can_be_false_) { } - BoolMask operator &(const BoolMask & m) const - { - return {can_be_true && m.can_be_true, can_be_false || m.can_be_false}; - } - BoolMask operator |(const BoolMask & m) const - { - return {can_be_true || m.can_be_true, can_be_false && m.can_be_false}; - } - BoolMask operator !() const - { - return {can_be_false, can_be_true}; - } + BoolMask operator&(const BoolMask & m) const { return {can_be_true && m.can_be_true, can_be_false || m.can_be_false}; } + BoolMask operator|(const BoolMask & m) const { return {can_be_true || m.can_be_true, can_be_false && m.can_be_false}; } + BoolMask operator!() const { return {can_be_false, can_be_true}; } - /// If mask is (true, true), then it can no longer change under operation |. - /// We use this condition to early-exit KeyConditions::check{InRange,After} methods. + bool operator==(const BoolMask & other) const { return can_be_true == other.can_be_true && can_be_false == other.can_be_false; } + + /// Check if mask is no longer changeable under BoolMask::combine. + /// We use this condition to early-exit KeyConditions::checkInRange methods. bool isComplete() const { - return can_be_false && can_be_true; + return can_be_true && can_be_false; } - /// These special constants are used to implement KeyCondition::mayBeTrue{InRange,After} via KeyCondition::check{InRange,After}. - /// When used as an initial_mask argument in KeyCondition::check{InRange,After} methods, they effectively prevent - /// calculation of discarded BoolMask component as it is already set to true. + /// Combine check result in different hyperrectangles. + static BoolMask combine(const BoolMask & left, const BoolMask & right) + { + return {left.can_be_true || right.can_be_true, left.can_be_false || right.can_be_false}; + } + + /// The following two special constants are used to speed up + /// KeyCondition::checkInRange. When used as an initial_mask argument, they + /// effectively prevent calculation of discarded BoolMask component as it is + /// no longer changeable under BoolMask::combine (isComplete). static const BoolMask consider_only_can_be_true; static const BoolMask consider_only_can_be_false; }; + +namespace fmt +{ +template <> +struct formatter +{ + static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } + + template + auto format(const BoolMask & mask, FormatContext & ctx) + { + return fmt::format_to(ctx.out(), "({}, {})", mask.can_be_true, mask.can_be_false); + } +}; +} diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 052e3ba4b74..5faa8d4b48b 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -459,7 +459,8 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( else disk->createDirectories(to); - localBackup( + Backup( + disk, disk, getRelativePath(), fs::path(to) / dir_path, @@ -496,6 +497,62 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( return create(single_disk_volume, to, dir_path, /*initialize=*/ !to_detached && !params.external_transaction); } +MutableDataPartStoragePtr DataPartStorageOnDiskBase::freezeRemote( + const std::string & to, + const std::string & dir_path, + const DiskPtr & dst_disk, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + std::function save_metadata_callback, + const ClonePartParams & params) const +{ + auto src_disk = volume->getDisk(); + if (params.external_transaction) + params.external_transaction->createDirectories(to); + else + dst_disk->createDirectories(to); + + /// freezeRemote() using copy instead of hardlinks for all files + /// In this case, files_to_copy_intead_of_hardlinks is set by empty + Backup( + src_disk, + dst_disk, + getRelativePath(), + fs::path(to) / dir_path, + read_settings, + write_settings, + params.make_source_readonly, + /* max_level= */ {}, + true, + /* files_to_copy_intead_of_hardlinks= */ {}, + params.external_transaction); + + /// The save_metadata_callback function acts on the target dist. + if (save_metadata_callback) + save_metadata_callback(dst_disk); + + if (params.external_transaction) + { + params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt"); + params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt"); + if (!params.keep_metadata_version) + params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); + } + else + { + dst_disk->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt"); + dst_disk->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt"); + if (!params.keep_metadata_version) + dst_disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); + } + + auto single_disk_volume = std::make_shared(dst_disk->getName(), dst_disk, 0); + + /// Do not initialize storage in case of DETACH because part may be broken. + bool to_detached = dir_path.starts_with(std::string_view((fs::path(MergeTreeData::DETACHED_DIR_NAME) / "").string())); + return create(single_disk_volume, to, dir_path, /*initialize=*/ !to_detached && !params.external_transaction); +} + MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const std::string & to, const std::string & dir_path, @@ -580,7 +637,7 @@ void DataPartStorageOnDiskBase::rename( disk.setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); disk.moveDirectory(from, to); - /// Only after moveDirectory() since before the directory does not exists. + /// Only after moveDirectory() since before the directory does not exist. SyncGuardPtr to_sync_guard; if (fsync_part_dir) to_sync_guard = volume->getDisk()->getDirectorySyncGuard(to); diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 75bf3d6f93c..81353d4e20b 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -70,6 +70,15 @@ public: std::function save_metadata_callback, const ClonePartParams & params) const override; + MutableDataPartStoragePtr freezeRemote( + const std::string & to, + const std::string & dir_path, + const DiskPtr & dst_disk, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + std::function save_metadata_callback, + const ClonePartParams & params) const override; + MutableDataPartStoragePtr clonePart( const std::string & to, const std::string & dir_path, diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp index 94f636423cc..bfe656111b0 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp @@ -95,11 +95,20 @@ UInt32 DataPartStorageOnDiskFull::getRefCount(const String & file_name) const return volume->getDisk()->getRefCount(fs::path(root_path) / part_dir / file_name); } -std::string DataPartStorageOnDiskFull::getRemotePath(const std::string & file_name) const +std::string DataPartStorageOnDiskFull::getRemotePath(const std::string & file_name, bool if_exists) const { - auto objects = volume->getDisk()->getStorageObjects(fs::path(root_path) / part_dir / file_name); + const std::string path = fs::path(root_path) / part_dir / file_name; + auto objects = volume->getDisk()->getStorageObjects(path); + + if (objects.empty() && if_exists) + return ""; + if (objects.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "One file must be mapped to one object on blob storage in MergeTree tables"); + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "One file must be mapped to one object on blob storage by path {} in MergeTree tables, have {}.", + path, objects.size()); + } return objects[0].remote_path; } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskFull.h b/src/Storages/MergeTree/DataPartStorageOnDiskFull.h index 15c6d42c721..ba787809b63 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskFull.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskFull.h @@ -23,7 +23,7 @@ public: Poco::Timestamp getFileLastModified(const String & file_name) const override; size_t getFileSize(const std::string & file_name) const override; UInt32 getRefCount(const std::string & file_name) const override; - std::string getRemotePath(const std::string & file_name) const override; + std::string getRemotePath(const std::string & file_name, bool if_exists) const override; String getUniqueId() const override; std::unique_ptr readFile( diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp index f05e8288719..6e0273701ad 100644 --- a/src/Storages/MergeTree/GinIndexStore.cpp +++ b/src/Storages/MergeTree/GinIndexStore.cpp @@ -1,3 +1,5 @@ +// NOLINTBEGIN(clang-analyzer-optin.core.EnumCastOutOfRange) + #include #include #include @@ -240,7 +242,7 @@ UInt32 GinIndexStore::getNumOfSegments() readBinary(version, *istr); if (version > static_cast>(CURRENT_GIN_FILE_FORMAT_VERSION)) - throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported inverted index version {}", version); + throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported full-text index version {}", version); readVarUInt(result, *istr); } @@ -504,3 +506,5 @@ void GinIndexStoreFactory::remove(const String & part_path) } } + +// NOLINTEND(clang-analyzer-optin.core.EnumCastOutOfRange) diff --git a/src/Storages/MergeTree/GinIndexStore.h b/src/Storages/MergeTree/GinIndexStore.h index ad14a142318..94c880211df 100644 --- a/src/Storages/MergeTree/GinIndexStore.h +++ b/src/Storages/MergeTree/GinIndexStore.h @@ -13,8 +13,8 @@ #include #include -/// GinIndexStore manages the generalized inverted index ("gin") for a data part, and it is made up of one or more immutable -/// index segments. +/// GinIndexStore manages the generalized inverted index ("gin") (full-text index )for a data part, and it is made up of one or more +/// immutable index segments. /// /// There are 4 types of index files in a store: /// 1. Segment ID file(.gin_sid): it contains one byte for version followed by the next available segment ID. diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index d06d9791a53..f6320a7e1e4 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -126,7 +126,7 @@ public: virtual UInt32 getRefCount(const std::string & file_name) const = 0; /// Get path on remote filesystem from file name on local filesystem. - virtual std::string getRemotePath(const std::string & file_name) const = 0; + virtual std::string getRemotePath(const std::string & file_name, bool if_exists) const = 0; virtual UInt64 calculateTotalSizeOnDisk() const = 0; @@ -258,6 +258,15 @@ public: std::function save_metadata_callback, const ClonePartParams & params) const = 0; + virtual std::shared_ptr freezeRemote( + const std::string & to, + const std::string & dir_path, + const DiskPtr & dst_disk, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + std::function save_metadata_callback, + const ClonePartParams & params) const = 0; + /// Make a full copy of a data part into 'to/dir_path' (possibly to a different disk). virtual std::shared_ptr clonePart( const std::string & to, diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 8124a4e516d..c276361559c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include @@ -346,16 +346,25 @@ IMergeTreeDataPart::Index IMergeTreeDataPart::getIndex() const if (!index_loaded) loadIndex(); index_loaded = true; - return TSA_SUPPRESS_WARNING_FOR_READ(index); /// The variable is guaranteed to be unchanged after return. + return index; } -void IMergeTreeDataPart::setIndex(Index index_) +void IMergeTreeDataPart::setIndex(const Columns & cols_) { std::scoped_lock lock(index_mutex); if (!index->empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once"); - index = index_; + index = std::make_shared(cols_); + index_loaded = true; +} + +void IMergeTreeDataPart::setIndex(Columns && cols_) +{ + std::scoped_lock lock(index_mutex); + if (!index->empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once"); + index = std::make_shared(std::move(cols_)); index_loaded = true; } @@ -784,7 +793,8 @@ void IMergeTreeDataPart::addProjectionPart( projection_parts[projection_name] = std::move(projection_part); } -void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded) +void IMergeTreeDataPart::loadProjections( + bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded, bool only_metadata) { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); for (const auto & projection : metadata_snapshot->projections) @@ -804,7 +814,10 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch try { - part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); + if (only_metadata) + part->loadChecksums(require_columns_checksums); + else + part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); } catch (...) { @@ -913,7 +926,7 @@ void IMergeTreeDataPart::loadIndex() const if (!index_file->eof()) throw Exception(ErrorCodes::EXPECTED_END_OF_FILE, "Index file {} is unexpectedly long", index_path); - index->assign(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end())); + index = std::make_shared(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end())); } } @@ -1260,6 +1273,33 @@ void IMergeTreeDataPart::appendFilesOfChecksums(Strings & files) files.push_back("checksums.txt"); } +void IMergeTreeDataPart::loadRowsCountFileForUnexpectedPart() +{ + auto read_rows_count = [&]() + { + auto buf = metadata_manager->read("count.txt"); + readIntText(rows_count, *buf); + assertEOF(*buf); + }; + if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part) + { + if (metadata_manager->exists("count.txt")) + { + read_rows_count(); + return; + } + } + else + { + if (getDataPartStorage().exists("count.txt")) + { + read_rows_count(); + return; + } + } + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No count.txt in part {}", name); +} + void IMergeTreeDataPart::loadRowsCount() { auto read_rows_count = [&]() @@ -1938,7 +1978,6 @@ void IMergeTreeDataPart::remove() std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached, bool broken) const { assert(!broken || detached); - String res; /** If you need to detach a part, and directory into which we want to rename it already exists, * we will rename to the directory with the name to which the suffix is added in the form of "_tryN". @@ -2395,6 +2434,38 @@ void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const exception_code = code; } +ColumnPtr IMergeTreeDataPart::getColumnSample(const NameAndTypePair & column) const +{ + const size_t total_mark = getMarksCount(); + /// If column doesn't have dynamic subcolumns or part has no data, just create column using it's type. + if (!column.type->hasDynamicSubcolumns() || !total_mark) + return column.type->createColumn(); + + /// Otherwise, read sample column with 0 rows from the part, so it will load dynamic structure. + NamesAndTypesList cols; + cols.emplace_back(column); + + StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); + StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); + + MergeTreeReaderPtr reader = getReader( + cols, + storage_snapshot_ptr, + MarkRanges{MarkRange(0, 1)}, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, + storage.getContext()->getMarkCache().get(), + std::make_shared(), + MergeTreeReaderSettings{}, + ValueSizeMap{}, + ReadBufferFromFileBase::ProfileCallback{}); + + Columns result; + result.resize(1); + reader->readRows(0, 1, false, 0, result); + return result[0]; +} + bool isCompactPart(const MergeTreeDataPartPtr & data_part) { return (data_part && data_part->getType() == MergeTreeDataPartType::Compact); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index ba2ff2ed6fe..bd3814bf415 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -43,7 +43,6 @@ class IReservation; using ReservationPtr = std::unique_ptr; class IMergeTreeReader; -class IMergeTreeDataPartWriter; class MarkCache; class UncompressedCache; class MergeTreeTransaction; @@ -51,7 +50,7 @@ class MergeTreeTransaction; struct MergeTreeReadTaskInfo; using MergeTreeReadTaskInfoPtr = std::shared_ptr; -enum class DataPartRemovalState +enum class DataPartRemovalState : uint8_t { NOT_ATTEMPTED, VISIBLE_TO_TRANSACTIONS, @@ -74,12 +73,11 @@ public: using VirtualFields = std::unordered_map; using MergeTreeReaderPtr = std::unique_ptr; - using MergeTreeWriterPtr = std::unique_ptr; using ColumnSizeByName = std::unordered_map; using NameToNumber = std::unordered_map; - using Index = std::shared_ptr; + using Index = std::shared_ptr; using IndexSizeByName = std::unordered_map; using Type = MergeTreeDataPartType; @@ -106,15 +104,6 @@ public: const ValueSizeMap & avg_value_size_hints_, const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0; - virtual MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) = 0; - virtual bool isStoredOnDisk() const = 0; virtual bool isStoredOnRemoteDisk() const = 0; @@ -166,8 +155,14 @@ public: NameAndTypePair getColumn(const String & name) const; std::optional tryGetColumn(const String & column_name) const; + /// Get sample column from part. For ordinary columns it just creates column using it's type. + /// For columns with dynamic structure it reads sample column with 0 rows from the part. + ColumnPtr getColumnSample(const NameAndTypePair & column) const; + const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; } + const SerializationByName & getSerializations() const { return serializations; } + SerializationPtr getSerialization(const String & column_name) const; SerializationPtr tryGetSerialization(const String & column_name) const; @@ -183,6 +178,8 @@ public: void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency); void appendFilesOfColumnsChecksumsIndexes(Strings & files, bool include_projection = false) const; + void loadRowsCountFileForUnexpectedPart(); + String getMarksFileExtension() const { return index_granularity_info.mark_type.getFileExtension(); } /// Generate the new name for this part according to `new_part_info` and min/max dates from the old name. @@ -195,6 +192,7 @@ public: /// take place, you must take original name of column for this part from /// storage and pass it to this method. std::optional getColumnPosition(const String & column_name) const; + const NameToNumber & getColumnPositions() const { return column_name_to_position; } /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()). /// If no checksums are present returns the name of the first physically existing column. @@ -246,7 +244,7 @@ public: /// The common procedure is to ask the keeper with unlock request to release a references to the blobs. /// And then follow the keeper answer decide remove or preserve the blobs in that part from s3. /// However in some special cases Clickhouse can make a decision without asking keeper. - enum class BlobsRemovalPolicyForTemporaryParts + enum class BlobsRemovalPolicyForTemporaryParts : uint8_t { /// decision about removing blobs is determined by keeper, the common case ASK_KEEPER, @@ -368,7 +366,8 @@ public: int32_t metadata_version; Index getIndex() const; - void setIndex(Index index_); + void setIndex(const Columns & cols_); + void setIndex(Columns && cols_); void unloadIndex(); /// For data in RAM ('index') @@ -439,10 +438,20 @@ public: bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); } + bool hasProjection() const { return !projection_parts.empty(); } + bool hasBrokenProjection(const String & projection_name) const; /// Return true, if all projections were loaded successfully and none was marked as broken. - void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false); + void loadProjections( + bool require_columns_checksums, + bool check_consistency, + bool & has_broken_projection, + bool if_not_loaded = false, + bool only_metadata = false); + + /// If checksums.txt exists, reads file's checksums (and sizes) from it + void loadChecksums(bool require); void setBrokenReason(const String & message, int code) const; @@ -453,23 +462,23 @@ public: /// File with compression codec name which was used to compress part columns /// by default. Some columns may have their own compression codecs, but /// default will be stored in this file. - static inline constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt"; + static constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt"; /// "delete-on-destroy.txt" is deprecated. It is no longer being created, only is removed. - static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt"; + static constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt"; - static inline constexpr auto UUID_FILE_NAME = "uuid.txt"; + static constexpr auto UUID_FILE_NAME = "uuid.txt"; /// File that contains information about kinds of serialization of columns /// and information that helps to choose kind of serialization later during merging /// (number of rows, number of rows with default values, etc). - static inline constexpr auto SERIALIZATION_FILE_NAME = "serialization.json"; + static constexpr auto SERIALIZATION_FILE_NAME = "serialization.json"; /// Version used for transactions. - static inline constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt"; + static constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt"; - static inline constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt"; + static constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt"; /// One of part files which is used to check how many references (I'd like /// to say hardlinks, but it will confuse even more) we have for the part @@ -481,7 +490,7 @@ public: /// it was mutation without any change for source part. In this case we /// really don't need to remove data from remote FS and need only decrement /// reference counter locally. - static inline constexpr auto FILE_FOR_REFERENCES_CHECK = "checksums.txt"; + static constexpr auto FILE_FOR_REFERENCES_CHECK = "checksums.txt"; /// Checks that all TTLs (table min/max, column ttls, so on) for part /// calculated. Part without calculated TTL may exist if TTL was added after @@ -668,9 +677,6 @@ private: static void appendFilesOfColumns(Strings & files); - /// If checksums.txt exists, reads file's checksums (and sizes) from it - void loadChecksums(bool require); - static void appendFilesOfChecksums(Strings & files); /// Loads marks index granularity into memory diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 2488c63e309..891ba1b9660 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -3,6 +3,13 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + + Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation) { Block result; @@ -38,18 +45,27 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per } IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : data_part(data_part_) - , storage(data_part_->storage) + : data_part_name(data_part_name_) + , serializations(serializations_) + , index_granularity_info(index_granularity_info_) + , storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) + , virtual_columns(virtual_columns_) , columns_list(columns_list_) , settings(settings_) - , index_granularity(index_granularity_) , with_final_mark(settings.can_use_adaptive_granularity) + , data_part_storage(data_part_storage_) + , index_granularity(index_granularity_) { } @@ -60,6 +76,102 @@ Columns IMergeTreeDataPartWriter::releaseIndexColumns() std::make_move_iterator(index_columns.end())); } +SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const +{ + auto it = serializations.find(column_name); + if (it == serializations.end()) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no column or subcolumn {} in part {}", column_name, data_part_name); + + return it->second; +} + +ASTPtr IMergeTreeDataPartWriter::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec ? column_desc.codec : default_codec->getFullCodecDesc(); + }; + + const auto & columns = metadata_snapshot->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + + return default_codec->getFullCodecDesc(); +} + + IMergeTreeDataPartWriter::~IMergeTreeDataPartWriter() = default; + +MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, + const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( + MergeTreeDataPartType part_type, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, + const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity) +{ + if (part_type == MergeTreeDataPartType::Compact) + return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, column_positions, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); + else if (part_type == MergeTreeDataPartType::Wide) + return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown part type: {}", part_type.toString()); +} + } diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 3f359904ddd..f04beb37ebb 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -1,12 +1,13 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -22,9 +23,14 @@ class IMergeTreeDataPartWriter : private boost::noncopyable { public: IMergeTreeDataPartWriter( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_ = {}); @@ -32,7 +38,7 @@ public: virtual void write(const Block & block, const IColumn::Permutation * permutation) = 0; - virtual void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) = 0; + virtual void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) = 0; virtual void finish(bool sync) = 0; @@ -40,16 +46,48 @@ public: const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; } protected: + SerializationPtr getSerialization(const String & column_name) const; - const MergeTreeMutableDataPartPtr data_part; - const MergeTreeData & storage; + ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + + IDataPartStorage & getDataPartStorage() { return *data_part_storage; } + + const String data_part_name; + /// Serializations for every columns and subcolumns by their names. + const SerializationByName serializations; + const MergeTreeIndexGranularityInfo index_granularity_info; + const MergeTreeSettingsPtr storage_settings; const StorageMetadataPtr metadata_snapshot; + const VirtualsDescriptionPtr virtual_columns; const NamesAndTypesList columns_list; const MergeTreeWriterSettings settings; - MergeTreeIndexGranularity index_granularity; const bool with_final_mark; + MutableDataPartStoragePtr data_part_storage; MutableColumns index_columns; + MergeTreeIndexGranularity index_granularity; }; +using MergeTreeDataPartWriterPtr = std::unique_ptr; +using ColumnPositions = std::unordered_map; + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( + MergeTreeDataPartType part_type, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, + const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns_, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + } diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index cf6b64aac85..4ad7f6ef991 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -264,7 +264,8 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const /// Move columns from block. name_and_type = requested_columns.begin(); for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) - res_columns[pos] = std::move(copy_block.getByName(name_and_type->name).column); + if (copy_block.has(name_and_type->name)) + res_columns[pos] = std::move(copy_block.getByName(name_and_type->name).column); } catch (Exception & e) { diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index a5b84eba241..a1ec0339fd6 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -100,7 +100,7 @@ protected: /// Position and level (of nesting). using ColumnNameLevel = std::optional>; - /// In case of part of the nested column does not exists, offsets should be + /// In case of part of the nested column does not exist, offsets should be /// read, but only the offsets for the current column, that is why it /// returns pair of size_t, not just one. ColumnNameLevel findColumnForOffsets(const NameAndTypePair & column) const; diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index c8d6aa0ba65..89c813ab233 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -7,20 +7,21 @@ namespace DB { IMergedBlockOutputStream::IMergedBlockOutputStream( - const MergeTreeMutableDataPartPtr & data_part, + const MergeTreeSettingsPtr & storage_settings_, + MutableDataPartStoragePtr data_part_storage_, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_) - : storage(data_part->storage) + : storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) - , data_part_storage(data_part->getDataPartStoragePtr()) + , data_part_storage(data_part_storage_) , reset_columns(reset_columns_) { if (reset_columns) { SerializationInfo::Settings info_settings = { - .ratio_of_defaults_for_sparse = storage.getSettings()->ratio_of_defaults_for_sparse_serialization, + .ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization, .choose_kind = false, }; @@ -42,7 +43,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( return {}; for (const auto & column : empty_columns) - LOG_TRACE(storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name); + LOG_TRACE(data_part->storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name); /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; @@ -91,7 +92,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( } else /// If we have no file in checksums it doesn't exist on disk { - LOG_TRACE(storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr); + LOG_TRACE(data_part->storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr); itr = remove_files.erase(itr); } } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index ca4e3899b29..a9b058418ea 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,10 +1,12 @@ #pragma once -#include "Storages/MergeTree/IDataPartStorage.h" +#include +#include #include #include #include #include +#include namespace DB { @@ -13,7 +15,8 @@ class IMergedBlockOutputStream { public: IMergedBlockOutputStream( - const MergeTreeMutableDataPartPtr & data_part, + const MergeTreeSettingsPtr & storage_settings_, + MutableDataPartStoragePtr data_part_storage_, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_); @@ -39,11 +42,13 @@ protected: SerializationInfoByName & serialization_infos, MergeTreeData::DataPart::Checksums & checksums); - const MergeTreeData & storage; + MergeTreeSettingsPtr storage_settings; + LoggerPtr log; + StorageMetadataPtr metadata_snapshot; MutableDataPartStoragePtr data_part_storage; - IMergeTreeDataPart::MergeTreeWriterPtr writer; + MergeTreeDataPartWriterPtr writer; bool reset_columns = false; SerializationInfoByName new_serialization_infos; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index b685a9e2a0f..f8cf19120c7 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -411,7 +411,9 @@ const KeyCondition::AtomMap KeyCondition::atom_map return false; const String & expression = value.get(); - // This optimization can't process alternation - this would require a comprehensive parsing of regular expression. + + /// This optimization can't process alternation - this would require + /// a comprehensive parsing of regular expression. if (expression.contains('|')) return false; @@ -453,6 +455,18 @@ const KeyCondition::AtomMap KeyCondition::atom_map } }; +static const std::set always_relaxed_atom_functions = {"match"}; +static const std::set always_relaxed_atom_elements + = {KeyCondition::RPNElement::FUNCTION_UNKNOWN, KeyCondition::RPNElement::FUNCTION_ARGS_IN_HYPERRECTANGLE}; + +/// Functions with range inversion cannot be relaxed. It will become stricter instead. +/// For example: +/// create table test(d Date, k Int64, s String) Engine=MergeTree order by toYYYYMM(d); +/// insert into test values ('2020-01-01', 1, ''); +/// insert into test values ('2020-01-02', 1, ''); +/// select * from test where d != '2020-01-01'; -- If relaxed, no record will return +static const std::set no_relaxed_atom_functions + = {"notLike", "notIn", "globalNotIn", "notNullIn", "globalNotNullIn", "notEquals", "notEmpty"}; static const std::map inverse_relations = { @@ -767,12 +781,10 @@ KeyCondition::KeyCondition( ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr_, - bool single_point_, - bool strict_) + bool single_point_) : key_expr(key_expr_) , key_subexpr_names(getAllSubexpressionNames(*key_expr)) , single_point(single_point_) - , strict(strict_) { size_t key_index = 0; for (const auto & name : key_column_names) @@ -791,6 +803,7 @@ KeyCondition::KeyCondition( if (!filter_dag) { has_filter = false; + relaxed = true; rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); return; } @@ -817,6 +830,9 @@ KeyCondition::KeyCondition( rpn = std::move(builder).extractRPN(); findHyperrectanglesForArgumentsOfSpaceFillingCurves(); + + if (std::any_of(rpn.begin(), rpn.end(), [&](const auto & elem) { return always_relaxed_atom_elements.contains(elem.function); })) + relaxed = true; } bool KeyCondition::addCondition(const String & column, const Range & range) @@ -1180,13 +1196,13 @@ bool KeyCondition::tryPrepareSetIndex( index_mapping.tuple_index = tuple_index; DataTypePtr data_type; std::optional key_space_filling_curve_argument_pos; - if (isKeyPossiblyWrappedByMonotonicFunctions(node, index_mapping.key_index, key_space_filling_curve_argument_pos, data_type, index_mapping.functions) + if (isKeyPossiblyWrappedByMonotonicFunctions( + node, index_mapping.key_index, key_space_filling_curve_argument_pos, data_type, index_mapping.functions) && !key_space_filling_curve_argument_pos) /// We don't support the analysis of space-filling curves and IN set. { indexes_mapping.push_back(index_mapping); data_types.push_back(data_type); - if (out_key_column_num < index_mapping.key_index) - out_key_column_num = index_mapping.key_index; + out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); } }; @@ -1225,10 +1241,6 @@ bool KeyCondition::tryPrepareSetIndex( size_t set_types_size = set_types.size(); size_t indexes_mapping_size = indexes_mapping.size(); - /// When doing strict matches, we have to check all elements in set. - if (strict && indexes_mapping_size < set_types_size) - return false; - for (auto & index_mapping : indexes_mapping) if (index_mapping.tuple_index >= set_types_size) return false; @@ -1307,6 +1319,13 @@ bool KeyCondition::tryPrepareSetIndex( } out.set_index = std::make_shared(set_columns, std::move(indexes_mapping)); + + /// When not all key columns are used or when there are multiple elements in + /// the set, the atom's hyperrectangle is expanded to encompass the missing + /// dimensions and any "gaps". + if (indexes_mapping_size < set_types_size || out.set_index->size() > 1) + relaxed = true; + return true; } @@ -1391,7 +1410,8 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( size_t & out_key_column_num, std::optional & out_argument_num_of_space_filling_curve, DataTypePtr & out_key_res_column_type, - MonotonicFunctionsChain & out_functions_chain) + MonotonicFunctionsChain & out_functions_chain, + bool assume_function_monotonicity) { std::vector chain_not_tested_for_monotonicity; DataTypePtr key_column_type; @@ -1434,8 +1454,7 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( arguments.push_back({ nullptr, key_column_type, "" }); auto func = func_builder->build(arguments); - /// If we know the given range only contains one value, then we treat all functions as positive monotonic. - if (!func || (!single_point && !func->hasInformationAboutMonotonicity())) + if (!func || !func->isDeterministicInScopeOfQuery() || (!assume_function_monotonicity && !func->hasInformationAboutMonotonicity())) return false; key_column_type = func->getResultType(); @@ -1602,6 +1621,10 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme if (atom_map.find(func_name) == std::end(atom_map)) return false; + if (always_relaxed_atom_functions.contains(func_name)) + relaxed = true; + + bool allow_constant_transformation = !no_relaxed_atom_functions.contains(func_name); if (num_args == 1) { if (!(isKeyPossiblyWrappedByMonotonicFunctions( @@ -1617,23 +1640,6 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme bool is_set_const = false; bool is_constant_transformed = false; - /// We don't look for inverted key transformations when strict is true, which is required for trivial count(). - /// Consider the following test case: - /// - /// create table test1(p DateTime, k int) engine MergeTree partition by toDate(p) order by k; - /// insert into test1 values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3); - /// select count() from test1 where p > toDateTime('2020-09-01 10:00:00'); - /// - /// toDate(DateTime) is always monotonic, but we cannot relax the predicates to be - /// >= toDate(toDateTime('2020-09-01 10:00:00')), which returns 3 instead of the right count: 2. - bool strict_condition = strict; - - /// If we use this key condition to prune partitions by single value, we cannot relax conditions for NOT. - if (single_point - && (func_name == "notLike" || func_name == "notIn" || func_name == "globalNotIn" || func_name == "notNullIn" - || func_name == "globalNotNullIn" || func_name == "notEquals" || func_name == "notEmpty")) - strict_condition = true; - if (functionIsInOrGlobalInOperator(func_name)) { if (tryPrepareSetIndex(func, out, key_column_num)) @@ -1654,19 +1660,25 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme } if (isKeyPossiblyWrappedByMonotonicFunctions( - func.getArgumentAt(0), key_column_num, argument_num_of_space_filling_curve, key_expr_type, chain)) + func.getArgumentAt(0), + key_column_num, + argument_num_of_space_filling_curve, + key_expr_type, + chain, + single_point && func_name == "equals")) { key_arg_pos = 0; } else if ( - !strict_condition - && canConstantBeWrappedByMonotonicFunctions(func.getArgumentAt(0), key_column_num, key_expr_type, const_value, const_type)) + allow_constant_transformation + && canConstantBeWrappedByMonotonicFunctions( + func.getArgumentAt(0), key_column_num, key_expr_type, const_value, const_type)) { key_arg_pos = 0; is_constant_transformed = true; } else if ( - single_point && func_name == "equals" && !strict_condition + single_point && func_name == "equals" && canConstantBeWrappedByFunctions(func.getArgumentAt(0), key_column_num, key_expr_type, const_value, const_type)) { key_arg_pos = 0; @@ -1685,19 +1697,25 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme } if (isKeyPossiblyWrappedByMonotonicFunctions( - func.getArgumentAt(1), key_column_num, argument_num_of_space_filling_curve, key_expr_type, chain)) + func.getArgumentAt(1), + key_column_num, + argument_num_of_space_filling_curve, + key_expr_type, + chain, + single_point && func_name == "equals")) { key_arg_pos = 1; } else if ( - !strict_condition - && canConstantBeWrappedByMonotonicFunctions(func.getArgumentAt(1), key_column_num, key_expr_type, const_value, const_type)) + allow_constant_transformation + && canConstantBeWrappedByMonotonicFunctions( + func.getArgumentAt(1), key_column_num, key_expr_type, const_value, const_type)) { key_arg_pos = 1; is_constant_transformed = true; } else if ( - single_point && func_name == "equals" && !strict_condition + single_point && func_name == "equals" && canConstantBeWrappedByFunctions(func.getArgumentAt(1), key_column_num, key_expr_type, const_value, const_type)) { key_arg_pos = 0; @@ -1797,6 +1815,8 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme func_name = "lessOrEquals"; else if (func_name == "greater") func_name = "greaterOrEquals"; + + relaxed = true; } } @@ -1945,7 +1965,7 @@ KeyCondition::Description KeyCondition::getDescription() const /// Build and optimize it simultaneously. struct Node { - enum class Type + enum class Type : uint8_t { /// Leaf, which is RPNElement. Leaf, @@ -2207,7 +2227,7 @@ KeyCondition::Description KeyCondition::getDescription() const */ /** For the range between tuples, determined by left_keys, left_bounded, right_keys, right_bounded, - * invoke the callback on every parallelogram composing this range (see the description above), + * invoke the callback on every hyperrectangle composing this range (see the description above), * and returns the OR of the callback results (meaning if callback returned true on any part of the range). */ template @@ -2278,13 +2298,10 @@ static BoolMask forAnyHyperrectangle( hyperrectangle[i] = Range::createWholeUniverseWithoutNull(); } - BoolMask result = initial_mask; - result = result | callback(hyperrectangle); + auto result = BoolMask::combine(initial_mask, callback(hyperrectangle)); /// There are several early-exit conditions (like the one below) hereinafter. - /// They are important; in particular, if initial_mask == BoolMask::consider_only_can_be_true - /// (which happens when this routine is called from KeyCondition::mayBeTrueXXX), - /// they provide significant speedup, which may be observed on merge_tree_huge_pk performance test. + /// They provide significant speedup, which may be observed on merge_tree_huge_pk performance test. if (result.isComplete()) return result; @@ -2293,9 +2310,11 @@ static BoolMask forAnyHyperrectangle( if (left_bounded) { hyperrectangle[prefix_size] = Range(left_keys[prefix_size]); - result = result - | forAnyHyperrectangle( - key_size, left_keys, right_keys, true, false, hyperrectangle, data_types, prefix_size + 1, initial_mask, callback); + result = BoolMask::combine( + result, + forAnyHyperrectangle( + key_size, left_keys, right_keys, true, false, hyperrectangle, data_types, prefix_size + 1, initial_mask, callback)); + if (result.isComplete()) return result; } @@ -2305,11 +2324,10 @@ static BoolMask forAnyHyperrectangle( if (right_bounded) { hyperrectangle[prefix_size] = Range(right_keys[prefix_size]); - result = result - | forAnyHyperrectangle( - key_size, left_keys, right_keys, false, true, hyperrectangle, data_types, prefix_size + 1, initial_mask, callback); - if (result.isComplete()) - return result; + result = BoolMask::combine( + result, + forAnyHyperrectangle( + key_size, left_keys, right_keys, false, true, hyperrectangle, data_types, prefix_size + 1, initial_mask, callback)); } return result; @@ -2334,14 +2352,14 @@ BoolMask KeyCondition::checkInRange( key_ranges.push_back(Range::createWholeUniverseWithoutNull()); } -/* std::cerr << "Checking for: ["; - for (size_t i = 0; i != used_key_size; ++i) - std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_keys[i]); - std::cerr << " ... "; + // std::cerr << "Checking for: ["; + // for (size_t i = 0; i != used_key_size; ++i) + // std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_keys[i]); + // std::cerr << " ... "; - for (size_t i = 0; i != used_key_size; ++i) - std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_keys[i]); - std::cerr << "]\n";*/ + // for (size_t i = 0; i != used_key_size; ++i) + // std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_keys[i]); + // std::cerr << "]" << ": " << initial_mask.can_be_true << " : " << initial_mask.can_be_false << "\n"; return forAnyHyperrectangle(used_key_size, left_keys, right_keys, true, true, key_ranges, data_types, 0, initial_mask, [&] (const Hyperrectangle & key_ranges_hyperrectangle) @@ -2351,7 +2369,7 @@ BoolMask KeyCondition::checkInRange( // std::cerr << "Hyperrectangle: "; // for (size_t i = 0, size = key_ranges.size(); i != size; ++i) // std::cerr << (i != 0 ? " × " : "") << key_ranges[i].toString(); - // std::cerr << ": " << res.can_be_true << "\n"; + // std::cerr << ": " << res.can_be_true << " : " << res.can_be_false << "\n"; return res; }); @@ -2480,7 +2498,7 @@ bool KeyCondition::matchesExactContinuousRange() const bool KeyCondition::extractPlainRanges(Ranges & ranges) const { - if (key_indices.empty() || key_indices.size() > 1) + if (key_indices.size() != 1) return false; if (hasMonotonicFunctionsChain()) @@ -2638,10 +2656,7 @@ bool KeyCondition::extractPlainRanges(Ranges & ranges) const if (rpn_stack.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::extractPlainRanges"); - for (auto & r : rpn_stack.top().ranges) - { - ranges.push_back(std::move(r)); - } + ranges = std::move(rpn_stack.top().ranges); return true; } @@ -2665,6 +2680,13 @@ BoolMask KeyCondition::checkInHyperrectangle( else if (element.function == RPNElement::FUNCTION_IN_RANGE || element.function == RPNElement::FUNCTION_NOT_IN_RANGE) { + if (element.key_column >= hyperrectangle.size()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Hyperrectangle size is {}, but requested element at posittion {} ({})", + hyperrectangle.size(), element.key_column, element.toString()); + } + const Range * key_range = &hyperrectangle[element.key_column]; /// The case when the column is wrapped in a chain of possibly monotonic functions. @@ -2958,8 +2980,6 @@ String KeyCondition::RPNElement::toString(std::string_view column_name, bool pri case ALWAYS_TRUE: return "true"; } - - UNREACHABLE(); } diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 6e248dd664a..2bc3b108e02 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -45,8 +45,7 @@ public: ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr, - bool single_point_ = false, - bool strict_ = false); + bool single_point_ = false); /// Whether the condition and its negation are feasible in the direct product of single column ranges specified by `hyperrectangle`. BoolMask checkInHyperrectangle( @@ -217,6 +216,8 @@ public: const RPN & getRPN() const { return rpn; } const ColumnIndices & getKeyColumns() const { return key_columns; } + bool isRelaxed() const { return relaxed; } + private: BoolMask checkInRange( size_t used_key_size, @@ -228,20 +229,22 @@ private: bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out); - /** Is node the key column, or an argument of a space-filling curve that is a key column, - * or expression in which that column is wrapped by a chain of functions, - * that can be monotonic on certain ranges? - * If these conditions are true, then returns number of column in key, - * optionally the argument position of a space-filling curve, - * type of resulting expression - * and fills chain of possibly-monotonic functions. - */ + /// Is node the key column, or an argument of a space-filling curve that is a key column, + /// or expression in which that column is wrapped by a chain of functions, + /// that can be monotonic on certain ranges? + /// If these conditions are true, then returns number of column in key, + /// optionally the argument position of a space-filling curve, + /// type of resulting expression + /// and fills chain of possibly-monotonic functions. + /// If @assume_function_monotonicity = true, assume all deterministic + /// functions as monotonic, which is useful for partition pruning. bool isKeyPossiblyWrappedByMonotonicFunctions( const RPNBuilderTreeNode & node, size_t & out_key_column_num, std::optional & out_argument_num_of_space_filling_curve, DataTypePtr & out_key_res_column_type, - MonotonicFunctionsChain & out_functions_chain); + MonotonicFunctionsChain & out_functions_chain, + bool assume_function_monotonicity = false); bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( const RPNBuilderTreeNode & node, @@ -338,11 +341,63 @@ private: /// Array joined column names NameSet array_joined_column_names; - // If true, always allow key_expr to be wrapped by function + /// If true, this key condition is used only to validate single value + /// ranges. It permits key_expr and constant of FunctionEquals to be + /// transformed by any deterministic functions. It is used by + /// PartitionPruner. bool single_point; - // If true, do not use always_monotonic information to transform constants - bool strict; + /// If true, this key condition is relaxed. When a key condition is relaxed, it + /// is considered weakened. This is because keys may not always align perfectly + /// with the condition specified in the query, and the aim is to enhance the + /// usefulness of different types of key expressions across various scenarios. + /// + /// For instance, in a scenario with one granule of key column toDate(a), where + /// the hyperrectangle is toDate(a) ∊ [x, y], the result of a ∊ [u, v] can be + /// deduced as toDate(a) ∊ [toDate(u), toDate(v)] due to the monotonic + /// non-decreasing nature of the toDate function. Similarly, for a ∊ (u, v), the + /// transformed outcome remains toDate(a) ∊ [toDate(u), toDate(v)] as toDate + /// does not strictly follow a monotonically increasing transformation. This is + /// one of the main use case about key condition relaxation. + /// + /// During the KeyCondition::checkInRange process, relaxing the key condition + /// can lead to a loosened result. For example, when transitioning from (u, v) + /// to [u, v], if a key is within the range [u, u], BoolMask::can_be_true will + /// be true instead of false, causing us to not skip this granule. This behavior + /// is acceptable as we can still filter it later on. Conversely, if the key is + /// within the range [u, v], BoolMask::can_be_false will be false instead of + /// true, indicating a stricter condition where all elements of the granule + /// satisfy the key condition. Hence, when the key condition is relaxed, we + /// cannot rely on BoolMask::can_be_false. One significant use case of + /// BoolMask::can_be_false is in trivial count optimization. + /// + /// Now let's review all the cases of key condition relaxation across different + /// atom types. + /// + /// 1. Not applicable: ALWAYS_FALSE, ALWAYS_TRUE, FUNCTION_NOT, + /// FUNCTION_AND, FUNCTION_OR. + /// + /// These atoms are either never relaxed or are relaxed by their children. + /// + /// 2. Constant transformed: FUNCTION_IN_RANGE, FUNCTION_NOT_IN_RANGE, + /// FUNCTION_IS_NULL. FUNCTION_IS_NOT_NULL, FUNCTION_IN_SET (1 element), + /// FUNCTION_NOT_IN_SET (1 element) + /// + /// These atoms are relaxed only when the associated constants undergo + /// transformation by monotonic functions, as illustrated in the example + /// mentioned earlier. + /// + /// 3. Always relaxed: FUNCTION_UNKNOWN, FUNCTION_IN_SET (>1 elements), + /// FUNCTION_NOT_IN_SET (>1 elements), FUNCTION_ARGS_IN_HYPERRECTANGLE + /// + /// These atoms are always considered relaxed for the sake of implementation + /// simplicity, as there may be "gaps" within the atom's hyperrectangle that the + /// granule's hyperrectangle may or may not intersect. + /// + /// NOTE: we also need to examine special functions that generate atoms. For + /// example, the `match` function can produce a FUNCTION_IN_RANGE atom based + /// on a given regular expression, which is relaxed for simplicity. + bool relaxed = false; }; String extractFixedPrefixFromLikePattern(std::string_view like_pattern, bool requires_perfect_prefix); diff --git a/src/Storages/MergeTree/LevelMergeSelector.cpp b/src/Storages/MergeTree/LevelMergeSelector.cpp deleted file mode 100644 index 67495c1372d..00000000000 --- a/src/Storages/MergeTree/LevelMergeSelector.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include - -#include - - -namespace DB -{ - -namespace -{ - -/** Estimates best set of parts to merge within passed alternatives. - * It is selected simply: by minimal size. - */ -struct Estimator -{ - using Iterator = LevelMergeSelector::PartsRange::const_iterator; - - void consider(Iterator begin, Iterator end, size_t sum_size) - { - double current_score = sum_size; - - if (min_score == 0.0 || current_score < min_score) - { - min_score = current_score; - best_begin = begin; - best_end = end; - } - } - - LevelMergeSelector::PartsRange getBest() const - { - return LevelMergeSelector::PartsRange(best_begin, best_end); - } - - double min_score = 0.0; - Iterator best_begin {}; - Iterator best_end {}; -}; - - -void selectWithinPartition( - const LevelMergeSelector::PartsRange & parts, - const size_t max_total_size_to_merge, - Estimator & estimator, - const LevelMergeSelector::Settings & settings) -{ - size_t parts_size = parts.size(); - if (parts_size < settings.parts_to_merge) - return; - - /// To easily calculate sum size in any range. - size_t parts_count = parts.size(); - size_t prefix_sum = 0; - std::vector prefix_sums(parts.size() + 1); - - for (size_t i = 0; i < parts_count; ++i) - { - prefix_sum += parts[i].size; - prefix_sums[i + 1] = prefix_sum; - } - - /// Use "corrected" level. It will be non-decreasing while traversing parts right to left. - /// This is done for compatibility with another algorithms. - size_t corrected_level_at_left = 0; - size_t corrected_level_at_right = 0; - - size_t range_end = parts_size; - size_t range_begin = range_end - settings.parts_to_merge; - - for (size_t i = range_begin; i < range_end; ++i) - if (corrected_level_at_left < parts[i].level) - corrected_level_at_left = parts[i].level; - - while (true) - { - if (corrected_level_at_left < parts[range_begin].level) - corrected_level_at_left = parts[range_begin].level; - - if (corrected_level_at_right < parts[range_end - 1].level) - corrected_level_at_right = parts[range_end - 1].level; - - /// Leftmost range of same corrected level. - if (corrected_level_at_left == corrected_level_at_right - && (range_begin == 0 || parts[range_begin - 1].level > corrected_level_at_left)) - { - size_t range_size = prefix_sums[range_end] - prefix_sums[range_begin]; - - if (range_size <= max_total_size_to_merge) - estimator.consider(parts.begin() + range_begin, parts.begin() + range_end, range_size); - - break; /// Minimum level is enough. - } - - if (range_begin == 0) - break; - - --range_begin; - --range_end; - } -} - -} - - -LevelMergeSelector::PartsRange LevelMergeSelector::select( - const PartsRanges & parts_ranges, - size_t max_total_size_to_merge) -{ - Estimator estimator; - - for (const auto & parts_range: parts_ranges) - selectWithinPartition(parts_range, max_total_size_to_merge, estimator, settings); - - return estimator.getBest(); -} - -} diff --git a/src/Storages/MergeTree/LevelMergeSelector.h b/src/Storages/MergeTree/LevelMergeSelector.h deleted file mode 100644 index f4080c379c4..00000000000 --- a/src/Storages/MergeTree/LevelMergeSelector.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/** Select parts to merge based on its level. - * Select first range of parts of parts_to_merge length with minimum level. - */ -class LevelMergeSelector : public IMergeSelector -{ -public: - struct Settings - { - size_t parts_to_merge = 10; - }; - - explicit LevelMergeSelector(const Settings & settings_) : settings(settings_) {} - - PartsRange select( - const PartsRanges & parts_ranges, - size_t max_total_size_to_merge) override; - -private: - const Settings settings; -}; - -} diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index bd8546f04cc..c6e98b4e5a1 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -81,6 +81,11 @@ size_t MarkRanges::getNumberOfMarks() const return result; } +bool MarkRanges::isOneRangeForWholePart(size_t num_marks_in_part) const +{ + return size() == 1 && front().begin == 0 && front().end == num_marks_in_part; +} + void MarkRanges::serialize(WriteBuffer & out) const { writeBinaryLittleEndian(this->size(), out); diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index 1d9d0a1e27e..626d4e9e689 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -2,7 +2,6 @@ #include #include -#include #include #include @@ -36,6 +35,7 @@ struct MarkRanges : public std::deque using std::deque::deque; /// NOLINT(modernize-type-traits) size_t getNumberOfMarks() const; + bool isOneRangeForWholePart(size_t num_marks_in_part) const; void serialize(WriteBuffer & out) const; String describe() const; diff --git a/src/Storages/MergeTree/MergeAlgorithm.h b/src/Storages/MergeTree/MergeAlgorithm.h index 9123182b71e..147fc4ea9cb 100644 --- a/src/Storages/MergeTree/MergeAlgorithm.h +++ b/src/Storages/MergeTree/MergeAlgorithm.h @@ -5,7 +5,7 @@ namespace DB { /// Algorithm of Merge. -enum class MergeAlgorithm +enum class MergeAlgorithm : uint8_t { Undecided, /// Not running yet Horizontal, /// per-row merge of all columns diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index e8d55f75b08..2db0c0af3d7 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -312,6 +312,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() task_context = Context::createCopy(storage.getContext()); task_context->makeQueryContext(); task_context->setCurrentQueryId(getQueryId()); + task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE); /// Add merge to list merge_mutate_entry = storage.getContext()->getMergeList().insert( diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 866a63911c3..a7070c80df9 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -168,6 +168,7 @@ ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const context->makeQueryContext(); auto queryId = getQueryId(); context->setCurrentQueryId(queryId); + context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE); return context; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index 5cc9c0e50d3..013da902dea 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -53,7 +53,7 @@ private: void prepare(); void finish(); - enum class State + enum class State : uint8_t { NEED_PREPARE, NEED_EXECUTE, diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 34e17e40a74..f1f856da3a2 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include #include #include #include @@ -34,6 +34,7 @@ #include #include #include +#include #include namespace DB @@ -378,7 +379,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()), ctx->compression_codec, - global_ctx->txn, + global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, ctx->blocks_are_granules_size, global_ctx->context->getWriteSettings()); @@ -535,6 +536,7 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const std::unique_ptr reread_buf = wbuf_readable ? wbuf_readable->tryGetReadBuffer() : nullptr; if (!reread_buf) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read temporary file {}", ctx->rows_sources_uncompressed_write_buf->getFileName()); + auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); if (!reread_buffer_raw) { @@ -543,7 +545,7 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const } /// Move ownership from std::unique_ptr to std::unique_ptr for CompressedReadBufferFromFile. /// First, release ownership from unique_ptr to base type. - reread_buf.release(); /// NOLINT(bugprone-unused-return-value): we already have the pointer value in `reread_buffer_raw` + reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw` /// Then, move ownership to unique_ptr to concrete type. std::unique_ptr reread_buffer_from_file(reread_buffer_raw); /// CompressedReadBufferFromFile expects std::unique_ptr as argument. @@ -555,6 +557,7 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const ctx->it_name_and_type = global_ctx->gathering_columns.cbegin(); const auto & settings = global_ctx->context->getSettingsRef(); + size_t max_delayed_streams = 0; if (global_ctx->new_data_part->getDataPartStorage().supportParallelWrite()) { @@ -563,20 +566,20 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const else max_delayed_streams = DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE; } + ctx->max_delayed_streams = max_delayed_streams; + bool all_parts_on_remote_disks = std::ranges::all_of(global_ctx->future_part->parts, [](const auto & part) { return part->isStoredOnRemoteDisk(); }); + ctx->use_prefetch = all_parts_on_remote_disks && global_ctx->data->getSettings()->vertical_merge_remote_filesystem_prefetch; + + if (ctx->use_prefetch && ctx->it_name_and_type != global_ctx->gathering_columns.end()) + ctx->prepared_pipe = createPipeForReadingOneColumn(ctx->it_name_and_type->name); + return false; } -void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const +Pipe MergeTask::VerticalMergeStage::createPipeForReadingOneColumn(const String & column_name) const { - const auto & [column_name, column_type] = *ctx->it_name_and_type; - Names column_names{column_name}; - - ctx->progress_before = global_ctx->merge_list_element_ptr->progress.load(std::memory_order_relaxed); - - global_ctx->column_progress = std::make_unique(ctx->progress_before, ctx->column_sizes->columnWeight(column_name)); - Pipes pipes; for (size_t part_num = 0; part_num < global_ctx->future_part->parts.size(); ++part_num) { @@ -585,20 +588,42 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const *global_ctx->data, global_ctx->storage_snapshot, global_ctx->future_part->parts[part_num], - column_names, + Names{column_name}, /*mark_ranges=*/ {}, + global_ctx->input_rows_filtered, /*apply_deleted_mask=*/ true, ctx->read_with_direct_io, - /*take_column_types_from_storage=*/ true, - /*quiet=*/ false, - global_ctx->input_rows_filtered); + ctx->use_prefetch); pipes.emplace_back(std::move(pipe)); } - auto pipe = Pipe::unitePipes(std::move(pipes)); + return Pipe::unitePipes(std::move(pipes)); +} + +void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const +{ + const auto & column_name = ctx->it_name_and_type->name; + + ctx->progress_before = global_ctx->merge_list_element_ptr->progress.load(std::memory_order_relaxed); + global_ctx->column_progress = std::make_unique(ctx->progress_before, ctx->column_sizes->columnWeight(column_name)); + + Pipe pipe; + if (ctx->prepared_pipe) + { + pipe = std::move(*ctx->prepared_pipe); + + auto next_column_it = std::next(ctx->it_name_and_type); + if (next_column_it != global_ctx->gathering_columns.end()) + ctx->prepared_pipe = createPipeForReadingOneColumn(next_column_it->name); + } + else + { + pipe = createPipeForReadingOneColumn(column_name); + } ctx->rows_sources_read_buf->seek(0, 0); + bool is_result_sparse = global_ctx->new_data_part->getSerialization(column_name)->getKind() == ISerialization::Kind::SPARSE; const auto data_settings = global_ctx->data->getSettings(); auto transform = std::make_unique( @@ -606,7 +631,8 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const pipe.numOutputPorts(), *ctx->rows_sources_read_buf, data_settings->merge_max_block_size, - data_settings->merge_max_block_size_bytes); + data_settings->merge_max_block_size_bytes, + is_result_sparse); pipe.addTransform(std::move(transform)); @@ -952,11 +978,10 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() part, global_ctx->merging_column_names, /*mark_ranges=*/ {}, + global_ctx->input_rows_filtered, /*apply_deleted_mask=*/ true, ctx->read_with_direct_io, - /*take_column_types_from_storage=*/ true, - /*quiet=*/ false, - global_ctx->input_rows_filtered); + /*prefetch=*/ false); if (global_ctx->metadata_snapshot->hasSortingKey()) { diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index f6268886b14..1294fa30449 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -289,7 +289,7 @@ private: bool need_sync{false}; /// End dependencies from previous stages - enum class State + enum class State : uint8_t { NEED_PREPARE, NEED_EXECUTE, @@ -299,7 +299,9 @@ private: Float64 progress_before = 0; std::unique_ptr column_to{nullptr}; + std::optional prepared_pipe; size_t max_delayed_streams = 0; + bool use_prefetch = false; std::list> delayed_streams; size_t column_elems_written{0}; QueryPipeline column_parts_pipeline; @@ -340,6 +342,8 @@ private: bool executeVerticalMergeForOneColumn() const; void finalizeVerticalMergeForOneColumn() const; + Pipe createPipeForReadingOneColumn(const String & column_name) const; + VerticalMergeRuntimeContextPtr ctx; GlobalRuntimeContextPtr global_ctx; }; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index b19c42c8db8..c1514416301 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -41,13 +41,13 @@ struct MergeTreeBlockSizePredictor void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = calculateDecay()); /// Return current block size (after update()) - inline size_t getBlockSize() const + size_t getBlockSize() const { return block_size_bytes; } /// Predicts what number of rows should be read to exhaust byte quota per column - inline size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const + size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const { double max_size_per_row = std::max(std::max(max_size_per_row_fixed, 1), max_size_per_row_dynamic); return (bytes_quota > block_size_rows * max_size_per_row) @@ -56,14 +56,14 @@ struct MergeTreeBlockSizePredictor } /// Predicts what number of rows should be read to exhaust byte quota per block - inline size_t estimateNumRows(size_t bytes_quota) const + size_t estimateNumRows(size_t bytes_quota) const { return (bytes_quota > block_size_bytes) ? static_cast((bytes_quota - block_size_bytes) / std::max(1, static_cast(bytes_per_row_current))) : 0; } - inline void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay()) + void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay()) { double alpha = std::pow(1. - decay, rows_was_read); double current_ration = rows_was_filtered / std::max(1.0, static_cast(rows_was_read)); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9350b24c96a..449b51f9b62 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -188,6 +189,7 @@ namespace ErrorCodes extern const int CANNOT_SCHEDULE_TASK; extern const int LIMIT_EXCEEDED; extern const int CANNOT_FORGET_PARTITION; + extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY; } static void checkSuspiciousIndices(const ASTFunction * index_function) @@ -469,7 +471,8 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const return storage_policy; } -ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr local_context) const +ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate( + const StorageSnapshotPtr & storage_snapshot, const ActionsDAGPtr & filter_dag, ContextPtr local_context) const { if (!local_context->getSettings().allow_statistic_optimize) return {}; @@ -484,7 +487,7 @@ ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate(const SelectQ ASTPtr expression_ast; ConditionEstimator result; - PartitionPruner partition_pruner(storage_snapshot->metadata, query_info, local_context, true /* strict */); + PartitionPruner partition_pruner(storage_snapshot->metadata, filter_dag, local_context); if (partition_pruner.isUseless()) { @@ -1175,8 +1178,6 @@ String MergeTreeData::MergingParams::getModeName() const case Graphite: return "Graphite"; case VersionedCollapsing: return "VersionedCollapsing"; } - - UNREACHABLE(); } Int64 MergeTreeData::getMaxBlockNumber() const @@ -1312,6 +1313,46 @@ static constexpr size_t loading_parts_initial_backoff_ms = 100; static constexpr size_t loading_parts_max_backoff_ms = 5000; static constexpr size_t loading_parts_max_tries = 3; +void MergeTreeData::loadUnexpectedDataPart(UnexpectedPartLoadState & state) +{ + const MergeTreePartInfo & part_info = state.loading_info->info; + const String & part_name = state.loading_info->name; + const DiskPtr & part_disk_ptr = state.loading_info->disk; + LOG_TRACE(log, "Loading unexpected part {} from disk {}", part_name, part_disk_ptr->getName()); + + LoadPartResult res; + auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); + auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, part_name); + String part_path = fs::path(relative_data_path) / part_name; + + try + { + state.part = getDataPartBuilder(part_name, single_disk_volume, part_name) + .withPartInfo(part_info) + .withPartFormatFromDisk() + .build(); + + state.part->loadRowsCountFileForUnexpectedPart(); + } + catch (...) + { + LOG_DEBUG(log, "Failed to load unexcepted data part {} with exception: {}", part_name, getExceptionMessage(std::current_exception(), false)); + if (!state.part) + { + /// Build a fake part and mark it as broken in case of filesystem error. + /// If the error impacts part directory instead of single files, + /// an exception will be thrown during detach and silently ignored. + state.part = getDataPartBuilder(part_name, single_disk_volume, part_name) + .withPartStorageType(MergeTreeDataPartStorageType::Full) + .withPartType(MergeTreeDataPartType::Wide) + .build(); + } + + state.is_broken = true; + tryLogCurrentException(log, fmt::format("while loading unexcepted part {} on path {}", part_name, part_path)); + } +} + MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( const MergeTreePartInfo & part_info, const String & part_name, @@ -1704,6 +1745,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional parts_to_load_by_disk(disks.size()); + std::vector unexpected_parts_to_load_by_disk(disks.size()); ThreadPoolCallbackRunnerLocal runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); @@ -1714,6 +1756,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), format_version)) - disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + { + if (expected_parts && !expected_parts->contains(it->name())) + unexpected_disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + else + disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + } } }, Priority{0}); } @@ -1736,6 +1784,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional unexpected_unloaded_data_parts; + for (const auto & [info, name, disk] : unexpected_parts_to_load) + { + bool uncovered = true; + for (const auto & part : unexpected_parts_to_load) + { + if (name != part.name && part.info.contains(info)) + { + uncovered = false; + break; + } + } + unexpected_unloaded_data_parts.push_back({std::make_shared(info, name, disk), uncovered, /*is_broken*/ false, /*part*/ nullptr}); + } + + if (!unexpected_unloaded_data_parts.empty()) + { + LOG_DEBUG(log, "Found {} unexpected data parts. They will be loaded asynchronously", unexpected_unloaded_data_parts.size()); + { + std::lock_guard lock(unexpected_data_parts_mutex); + unexpected_data_parts = std::move(unexpected_unloaded_data_parts); + unexpected_data_parts_loading_finished = false; + } + + unexpected_data_parts_loading_task = getContext()->getSchedulePool().createTask( + "MergeTreeData::loadUnexpectedDataParts", + [this] { loadUnexpectedDataParts(); }); + } + loading_tree.traverse(/*recursive=*/ true, [&](const auto & node) { if (!node->is_loaded) @@ -1889,6 +1970,54 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts"); + + for (auto & load_state : unexpected_data_parts) + { + std::lock_guard lock(unexpected_data_parts_mutex); + chassert(!load_state.part); + if (unexpected_data_parts_loading_canceled) + { + runner.waitForAllToFinishAndRethrowFirstError(); + return; + } + runner([&]() + { + loadUnexpectedDataPart(load_state); + + chassert(load_state.part); + if (load_state.is_broken) + { + load_state.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes + } + }, Priority{}); + } + runner.waitForAllToFinishAndRethrowFirstError(); + LOG_DEBUG(log, "Loaded {} unexpected data parts", unexpected_data_parts.size()); + + { + std::lock_guard lock(unexpected_data_parts_mutex); + unexpected_data_parts_loading_finished = true; + unexpected_data_parts_cv.notify_all(); + } +} + void MergeTreeData::loadOutdatedDataParts(bool is_async) try { @@ -2024,24 +2153,74 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ LOG_TRACE(log, "Finished waiting for outdated data parts to be loaded"); } -void MergeTreeData::startOutdatedDataPartsLoadingTask() +void MergeTreeData::waitForUnexpectedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ANALYSIS +{ + /// Background tasks are not run if storage is static. + if (isStaticStorage()) + return; + + /// If waiting is not required, do NOT log and do NOT enable/disable turbo mode to make `waitForUnexpectedPartsToBeLoaded` a lightweight check + { + std::unique_lock lock(unexpected_data_parts_mutex); + if (unexpected_data_parts_loading_canceled) + throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of unexpected data parts was already canceled"); + if (unexpected_data_parts_loading_finished) + return; + } + + /// We need to load parts as fast as possible + getUnexpectedPartsLoadingThreadPool().enableTurboMode(); + SCOPE_EXIT({ + /// Let's lower the number of threads e.g. for later ATTACH queries to behave as usual + getUnexpectedPartsLoadingThreadPool().disableTurboMode(); + }); + + LOG_TRACE(log, "Will wait for unexpected data parts to be loaded"); + + std::unique_lock lock(unexpected_data_parts_mutex); + + unexpected_data_parts_cv.wait(lock, [this]() TSA_NO_THREAD_SAFETY_ANALYSIS + { + return unexpected_data_parts_loading_finished || unexpected_data_parts_loading_canceled; + }); + + if (unexpected_data_parts_loading_canceled) + throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of unexpected data parts was canceled"); + + LOG_TRACE(log, "Finished waiting for unexpected data parts to be loaded"); +} + +void MergeTreeData::startOutdatedAndUnexpectedDataPartsLoadingTask() { if (outdated_data_parts_loading_task) outdated_data_parts_loading_task->activateAndSchedule(); + if (unexpected_data_parts_loading_task) + unexpected_data_parts_loading_task->activateAndSchedule(); } -void MergeTreeData::stopOutdatedDataPartsLoadingTask() +void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask() { - if (!outdated_data_parts_loading_task) - return; - + if (outdated_data_parts_loading_task) { - std::lock_guard lock(outdated_data_parts_mutex); - outdated_data_parts_loading_canceled = true; + { + std::lock_guard lock(outdated_data_parts_mutex); + outdated_data_parts_loading_canceled = true; + } + + outdated_data_parts_loading_task->deactivate(); + outdated_data_parts_cv.notify_all(); } - outdated_data_parts_loading_task->deactivate(); - outdated_data_parts_cv.notify_all(); + if (unexpected_data_parts_loading_task) + { + { + std::lock_guard lock(unexpected_data_parts_mutex); + unexpected_data_parts_loading_canceled = true; + } + + unexpected_data_parts_loading_task->deactivate(); + unexpected_data_parts_cv.notify_all(); + } } /// Is the part directory old. @@ -3008,13 +3187,16 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context commands.apply(new_metadata, local_context); - if (AlterCommands::hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index) + if (AlterCommands::hasFullTextIndex(new_metadata) && !settings.allow_experimental_full_text_index) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "Experimental Inverted Index feature is not enabled (turn on setting 'allow_experimental_inverted_index')"); + "Experimental full-text index feature is not enabled (turn on setting 'allow_experimental_full_text_index')"); for (const auto & disk : getDisks()) - if (!disk->supportsHardLinks()) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ALTER TABLE is not supported for immutable disk '{}'", disk->getName()); + if (!disk->supportsHardLinks() && !commands.isSettingsAlter() && !commands.isCommentAlter()) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "ALTER TABLE commands are not supported on immutable disk '{}', except for setting and comment alteration", + disk->getName()); /// Set of columns that shouldn't be altered. NameSet columns_alter_type_forbidden; @@ -3692,7 +3874,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts continue; auto storage_column = columns.getPhysical(part_column.name); - if (!storage_column.type->hasDynamicSubcolumns()) + if (!storage_column.type->hasDynamicSubcolumnsDeprecated()) continue; auto concrete_storage_column = object_columns.getPhysical(part_column.name); @@ -4101,16 +4283,13 @@ void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true, &lock); } -void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered) +void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix) { if (prefix.empty()) LOG_INFO(log, "Renaming {} to {} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); else LOG_INFO(log, "Renaming {} to {}_{} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name); - if (restore_covered) - waitForOutdatedPartsToBeLoaded(); - auto lock = lockParts(); bool removed_active_part = false; bool restored_active_part = false; @@ -4136,132 +4315,6 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState()); data_parts_indexes.erase(it_part); - if (restore_covered && part->info.level == 0 && part->info.mutation == 0) - { - LOG_WARNING(log, "Will not recover parts covered by zero-level part {}", part->name); - return; - } - - /// Let's restore some parts covered by unexpected to avoid partial data - if (restore_covered) - { - Strings restored; - Strings error_parts; - - auto is_appropriate_state = [] (const DataPartPtr & part_) - { - /// In rare cases, we may have a chain of unexpected parts that cover common source parts, e.g. all_1_2_3, all_1_3_4 - /// It may happen as a result of interrupted cloneReplica - bool already_active = part_->getState() == DataPartState::Active; - if (!already_active && part_->getState() != DataPartState::Outdated) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to restore a part {} from unexpected state: {}", part_->name, part_->getState()); - return !already_active; - }; - - auto activate_part = [this, &restored_active_part](auto it) - { - /// It's not clear what to do if we try to activate part that was removed in transaction. - /// It may happen only in ReplicatedMergeTree, so let's simply throw LOGICAL_ERROR for now. - chassert((*it)->version.isRemovalTIDLocked()); - if ((*it)->version.removal_tid_lock == Tx::PrehistoricTID.getHash()) - (*it)->version.unlockRemovalTID(Tx::PrehistoricTID, TransactionInfoContext{getStorageID(), (*it)->name}); - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot activate part {} that was removed by transaction ({})", - (*it)->name, (*it)->version.removal_tid_lock); - - addPartContributionToColumnAndSecondaryIndexSizes(*it); - addPartContributionToDataVolume(*it); - modifyPartState(it, DataPartState::Active); /// iterator is not invalidated here - restored_active_part = true; - }; - - /// ActiveDataPartSet allows to restore most top-level parts instead of unexpected. - /// It can be important in case of assigned merges. If unexpected part is result of some - /// finished, but not committed merge then we should restore (at least try to restore) - /// closest ancestors for the unexpected part to be able to execute it. - /// However it's not guaranteed because outdated parts can intersect - ActiveDataPartSet parts_for_replacement(format_version); - auto range = getDataPartsPartitionRange(part->info.partition_id); - DataPartsVector parts_candidates(range.begin(), range.end()); - - /// In case of intersecting outdated parts we want to add bigger parts (with higher level) first - auto comparator = [] (const DataPartPtr left, const DataPartPtr right) -> bool - { - if (left->info.level < right->info.level) - return true; - else if (left->info.level > right->info.level) - return false; - else - return left->info.mutation < right->info.mutation; - }; - std::sort(parts_candidates.begin(), parts_candidates.end(), comparator); - /// From larger to smaller parts - for (const auto & part_candidate_in_partition : parts_candidates | std::views::reverse) - { - if (part->info.contains(part_candidate_in_partition->info) - && is_appropriate_state(part_candidate_in_partition)) - { - String out_reason; - /// Outdated parts can itersect legally (because of DROP_PART) here it's okay, we - /// are trying to do out best to restore covered parts. - auto outcome = parts_for_replacement.tryAddPart(part_candidate_in_partition->info, &out_reason); - if (outcome == ActiveDataPartSet::AddPartOutcome::HasIntersectingPart) - { - error_parts.push_back(part->name); - LOG_ERROR(log, "Failed to restore part {}, because of intersection reason '{}'", part->name, out_reason); - } - } - } - - if (parts_for_replacement.size() > 0) - { - std::vector> holes_list; - /// Most part of the code below is just to write pretty message - auto part_infos = parts_for_replacement.getPartInfos(); - int64_t current_right_block = part_infos[0].min_block; - for (const auto & top_level_part_to_replace : part_infos) - { - auto data_part_it = data_parts_by_info.find(top_level_part_to_replace); - if (data_part_it == data_parts_by_info.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find part {} in own set", top_level_part_to_replace.getPartNameForLogs()); - activate_part(data_part_it); - restored.push_back((*data_part_it)->name); - if (top_level_part_to_replace.min_block - current_right_block > 1) - holes_list.emplace_back(current_right_block, top_level_part_to_replace.min_block); - current_right_block = top_level_part_to_replace.max_block; - } - if (part->info.max_block != current_right_block) - holes_list.emplace_back(current_right_block, part->info.max_block); - - for (const String & name : restored) - LOG_INFO(log, "Activated part {} in place of unexpected {}", name, part->name); - - if (!error_parts.empty() || !holes_list.empty()) - { - std::string error_parts_message, holes_list_message; - if (!error_parts.empty()) - error_parts_message = fmt::format(" Parts failed to restore because of intersection: [{}]", fmt::join(error_parts, ", ")); - if (!holes_list.empty()) - { - if (!error_parts.empty()) - holes_list_message = "."; - - Strings holes_list_pairs; - for (const auto & [left_side, right_side] : holes_list) - holes_list_pairs.push_back(fmt::format("({}, {})", left_side + 1, right_side - 1)); - holes_list_message += fmt::format(" Block ranges failed to restore: [{}]", fmt::join(holes_list_pairs, ", ")); - } - LOG_WARNING(log, "The set of parts restored in place of {} looks incomplete. " - "SELECT queries may observe gaps in data until this replica is synchronized with other replicas.{}{}", - part->name, error_parts_message, holes_list_message); - } - } - else - { - LOG_INFO(log, "Don't find any parts for replacement instead of unexpected {}", part->name); - } - } - if (removed_active_part || restored_active_part) resetObjectColumnsFromActiveParts(lock); } @@ -5074,7 +5127,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String auto volume = getStoragePolicy()->getVolumeByName(name); if (!volume) - throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exists on policy {}", name, getStoragePolicy()->getName()); + throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exist on policy {}", name, getStoragePolicy()->getName()); if (parts.empty()) throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Nothing to move (check that the partition exists)."); @@ -5361,20 +5414,50 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( &temp_dirs, false, false); - auto projection_parts = part->getProjectionParts(); - for (const auto & [projection_name, projection_part] : projection_parts) + auto backup_projection = [&](IDataPartStorage & storage, IMergeTreeDataPart & projection_part) { - projection_part->getDataPartStorage().backup( - projection_part->checksums, - projection_part->getFileNamesWithoutChecksums(), + storage.backup( + projection_part.checksums, + projection_part.getFileNamesWithoutChecksums(), fs::path{data_path_in_backup} / part->name, backup_settings, read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs, - projection_part->is_broken, + projection_part.is_broken, backup_settings.allow_backup_broken_projections); + }; + + auto projection_parts = part->getProjectionParts(); + std::string proj_suffix = ".proj"; + std::unordered_set defined_projections; + + for (const auto & [projection_name, projection_part] : projection_parts) + { + defined_projections.emplace(projection_name); + backup_projection(projection_part->getDataPartStorage(), *projection_part); + } + + /// It is possible that the part has a written but not loaded projection, + /// e.g. it is written to parent part's checksums.txt and exists on disk, + /// but does not exist in table's projections definition. + /// Such a part can appear server was restarted after DROP PROJECTION but before old part was removed. + /// In this case, the old part will load only projections from metadata. + /// See 031145_non_loaded_projection_backup.sh. + for (const auto & [name, _] : part->checksums.files) + { + auto projection_name = fs::path(name).stem().string(); + if (endsWith(name, proj_suffix) && !defined_projections.contains(projection_name)) + { + auto projection_storage = part->getDataPartStorage().getProjection(projection_name + proj_suffix); + if (projection_storage->exists("checksums.txt")) + { + auto projection_part = const_cast(*part).getProjectionPartBuilder( + projection_name, /* is_temp_projection */false).withPartFormatFromDisk().build(); + backup_projection(projection_part->getDataPartStorage(), *projection_part); + } + } } if (hold_storage_and_part_ptrs) @@ -5484,7 +5567,7 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const auto backup = restorer.getBackup(); Strings part_names = backup->listFiles(data_path_in_backup, /*recursive*/ false); - boost::remove_erase(part_names, "mutations"); + std::erase(part_names, "mutations"); bool restore_broken_parts_as_detached = restorer.getRestoreSettings().restore_broken_parts_as_detached; @@ -6051,6 +6134,21 @@ bool MergeTreeData::supportsLightweightDelete() const return true; } +bool MergeTreeData::hasProjection() const +{ + auto lock = lockParts(); + for (const auto & part : data_parts_by_info) + { + if (part->getState() == MergeTreeDataPartState::Outdated + || part->getState() == MergeTreeDataPartState::Deleting) + continue; + + if (part->hasProjection()) + return true; + } + return false; +} + MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const { ProjectionPartsVector res; @@ -7120,28 +7218,30 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData( return checkStructureAndGetMergeTreeData(*source_table, src_snapshot, my_snapshot); } -std::pair MergeTreeData::cloneAndLoadDataPartOnSameDisk( +/// must_on_same_disk=false is used only when attach partition; Both for same disk and different disk. +std::pair MergeTreeData::cloneAndLoadDataPart( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, const StorageMetadataPtr & metadata_snapshot, const IDataPartStorage::ClonePartParams & params, const ReadSettings & read_settings, - const WriteSettings & write_settings) + const WriteSettings & write_settings, + bool must_on_same_disk) { chassert(!isStaticStorage()); /// Check that the storage policy contains the disk where the src_part is located. - bool does_storage_policy_allow_same_disk = false; + bool on_same_disk = false; for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { if (disk->getName() == src_part->getDataPartStorage().getDiskName()) { - does_storage_policy_allow_same_disk = true; + on_same_disk = true; break; } } - if (!does_storage_policy_allow_same_disk) + if (!on_same_disk && must_on_same_disk) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Could not clone and load part {} because disk does not belong to storage policy", @@ -7151,7 +7251,6 @@ std::pair MergeTreeData::cloneAn String tmp_dst_part_name = tmp_part_prefix + dst_part_name; auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); - /// Why it is needed if we only hardlink files? auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); auto src_part_storage = src_part->getDataPartStoragePtr(); @@ -7162,13 +7261,32 @@ std::pair MergeTreeData::cloneAn if (params.copy_instead_of_hardlink) with_copy = " (copying data)"; - auto dst_part_storage = src_part_storage->freeze( - relative_data_path, - tmp_dst_part_name, - read_settings, - write_settings, - /* save_metadata_callback= */ {}, - params); + std::shared_ptr dst_part_storage{}; + if (on_same_disk) + { + dst_part_storage = src_part_storage->freeze( + relative_data_path, + tmp_dst_part_name, + read_settings, + write_settings, + /* save_metadata_callback= */ {}, + params); + } + else + { + auto reservation_on_dst = getStoragePolicy()->reserve(src_part->getBytesOnDisk()); + if (!reservation_on_dst) + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on disk."); + dst_part_storage = src_part_storage->freezeRemote( + relative_data_path, + tmp_dst_part_name, + /* dst_disk = */reservation_on_dst->getDisk(), + read_settings, + write_settings, + /* save_metadata_callback= */ {}, + params + ); + } if (params.metadata_version_to_write.has_value()) { @@ -7300,10 +7418,19 @@ void MergeTreeData::reportBrokenPart(MergeTreeData::DataPartPtr data_part) const broken_part_callback(part->name); } } - else if (data_part->getState() == MergeTreeDataPartState::Active) - broken_part_callback(data_part->name); else - LOG_DEBUG(log, "Will not check potentially broken part {} because it's not active", data_part->getNameWithState()); + { + MergeTreeDataPartState state = MergeTreeDataPartState::Temporary; + { + auto lock = lockParts(); + state = data_part->getState(); + } + + if (state == MergeTreeDataPartState::Active) + broken_part_callback(data_part->name); + else + LOG_DEBUG(log, "Will not check potentially broken part {} because it's not active", data_part->getNameWithState()); + } } MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & partition_ast, ContextPtr local_context) const @@ -8384,7 +8511,7 @@ std::pair MergeTreeData::createE MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), Statistics{}, - compression_codec, txn); + compression_codec, txn ? txn->tid : Tx::PrehistoricTID); bool sync_on_insert = settings->fsync_after_insert; @@ -8447,6 +8574,16 @@ void MergeTreeData::unloadPrimaryKeys() } } +void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) +{ + /// Aggregate functions already forbidden, but SimpleAggregateFunction are not + for (const auto & data_type : sorting_key.data_types) + { + if (dynamic_cast(data_type->getCustomName())) + throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY, "Column with type {} is not allowed in key expression", data_type->getCustomName()->getName()); + } +} + bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove) { for (const auto & command : commands) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4436dadd561..440daaf6ced 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -426,7 +426,7 @@ public: bool supportsPrewhere() const override { return true; } - ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, const StorageSnapshotPtr &, ContextPtr) const override; + ConditionEstimator getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const override; bool supportsFinal() const override; @@ -434,10 +434,13 @@ public: bool supportsTTL() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool supportsLightweightDelete() const override; + bool hasProjection() const override; + bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; @@ -652,10 +655,9 @@ public: /// Renames the part to detached/_ and removes it from data_parts, //// so it will not be deleted in clearOldParts. - /// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part. /// NOTE: This method is safe to use only for parts which nobody else holds (like on server start or for parts which was not committed). /// For active parts it's unsafe because this method modifies fields of part (rename) while some other thread can try to read it. - void forcefullyMovePartToDetachedAndRemoveFromMemory(const DataPartPtr & part, const String & prefix = "", bool restore_covered = false); + void forcefullyMovePartToDetachedAndRemoveFromMemory(const DataPartPtr & part, const String & prefix = ""); /// This method should not be here, but async loading of Outdated parts is implemented in MergeTreeData virtual void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & /*part_name*/) {} @@ -737,6 +739,8 @@ public: const ASTPtr & new_settings, AlterLockHolder & table_lock_holder); + static void verifySortingKey(const KeyDescription & sorting_key); + /// Should be called if part data is suspected to be corrupted. /// Has the ability to check all other parts /// which reside on the same disk of the suspicious part. @@ -842,14 +846,15 @@ public: MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; - std::pair cloneAndLoadDataPartOnSameDisk( + std::pair cloneAndLoadDataPart( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, const StorageMetadataPtr & metadata_snapshot, const IDataPartStorage::ClonePartParams & params, const ReadSettings & read_settings, - const WriteSettings & write_settings); + const WriteSettings & write_settings, + bool must_on_same_disk); virtual std::vector getMutationsStatus() const = 0; @@ -1072,6 +1077,7 @@ public: scope_guard getTemporaryPartDirectoryHolder(const String & part_dir_name) const; void waitForOutdatedPartsToBeLoaded() const; + void waitForUnexpectedPartsToBeLoaded() const; bool canUsePolymorphicParts() const; /// TODO: make enabled by default in the next release if no problems found. @@ -1551,13 +1557,33 @@ protected: PartLoadingTreeNodes outdated_unloaded_data_parts TSA_GUARDED_BY(outdated_data_parts_mutex); bool outdated_data_parts_loading_canceled TSA_GUARDED_BY(outdated_data_parts_mutex) = false; + mutable std::mutex unexpected_data_parts_mutex; + mutable std::condition_variable unexpected_data_parts_cv; + + struct UnexpectedPartLoadState + { + PartLoadingTree::NodePtr loading_info; + /// if it is covered by any unexpected part + bool uncovered = true; + bool is_broken = false; + MutableDataPartPtr part; + }; + + BackgroundSchedulePool::TaskHolder unexpected_data_parts_loading_task; + std::vector unexpected_data_parts; + bool unexpected_data_parts_loading_canceled TSA_GUARDED_BY(unexpected_data_parts_mutex) = false; + + void loadUnexpectedDataParts(); + void loadUnexpectedDataPart(UnexpectedPartLoadState & state); + /// This has to be "true" by default, because in case of empty table or absence of Outdated parts /// it is automatically finished. std::atomic_bool outdated_data_parts_loading_finished = true; + std::atomic_bool unexpected_data_parts_loading_finished = true; void loadOutdatedDataParts(bool is_async); - void startOutdatedDataPartsLoadingTask(); - void stopOutdatedDataPartsLoadingTask(); + void startOutdatedAndUnexpectedDataPartsLoadingTask(); + void stopOutdatedAndUnexpectedDataPartsLoadingTask(); static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type); static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index aad34bfb914..d2852a3a504 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -20,14 +20,14 @@ namespace DB class MergeProgressCallback; -enum class SelectPartsDecision +enum class SelectPartsDecision : uint8_t { SELECTED = 0, CANNOT_SELECT = 1, NOTHING_TO_MERGE = 2, }; -enum class ExecuteTTLType +enum class ExecuteTTLType : uint8_t { NONE = 0, NORMAL = 1, diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 7c9e4a371ab..f33f4293023 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -61,7 +61,7 @@ void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, const String & name) const { - /// Skip inverted index files, these have a default MergeTreeDataPartChecksum with file_size == 0 + /// Skip full-text index files, these have a default MergeTreeDataPartChecksum with file_size == 0 if (isGinFile(name)) return; @@ -88,7 +88,7 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r for (const auto & [name, checksum] : files) { - /// Exclude files written by inverted index from check. No correct checksums are available for them currently. + /// Exclude files written by full-text index from check. No correct checksums are available for them currently. if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid")) continue; diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 418b2d8f81b..4a160e5e229 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -47,26 +47,36 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( avg_value_size_hints, profile_callback, CLOCK_MONOTONIC_COARSE); } -IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( +MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, + const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) { NamesAndTypesList ordered_columns_list; std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list), - [this](const auto & column) { return getColumnPosition(column.name) != std::nullopt; }); + [&column_positions](const auto & column) { return column_positions.contains(column.name); }); /// Order of writing is important in compact format - ordered_columns_list.sort([this](const auto & lhs, const auto & rhs) - { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); + ordered_columns_list.sort([&column_positions](const auto & lhs, const auto & rhs) + { return column_positions.at(lhs.name) < column_positions.at(rhs.name); }); return std::make_unique( - shared_from_this(), ordered_columns_list, metadata_snapshot, - indices_to_recalc, stats_to_recalc_, getMarksFileExtension(), + data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, ordered_columns_list, metadata_snapshot, virtual_columns, + indices_to_recalc, stats_to_recalc_, marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 3a4e7b95f33..1fb84424774 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -40,15 +40,6 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; - MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) override; - bool isStoredOnDisk() const override { return true; } bool isStoredOnRemoteDisk() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartState.h b/src/Storages/MergeTree/MergeTreeDataPartState.h index 5c4779f016e..1845b7976cc 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartState.h +++ b/src/Storages/MergeTree/MergeTreeDataPartState.h @@ -15,7 +15,7 @@ namespace DB * Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion * Active -> DeleteOnDestroy: if part was moved to another disk */ -enum class MergeTreeDataPartState +enum class MergeTreeDataPartState : uint8_t { Temporary, /// the part is generating now, it is not in data_parts list PreActive, /// the part is in data_parts, but not used for SELECTs diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index 808b2b25fcd..39eb2c4fc80 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include @@ -272,6 +274,29 @@ bool MergeTreeDataPartTTLInfos::hasAnyNonFinishedTTLs() const return false; } +namespace +{ + /// We had backward incompatibility in representation of serialized expressions, example: + /// + /// `expired + toIntervalSecond(20)` vs `plus(expired, toIntervalSecond(20))` + /// Since they are stored as strings we cannot compare them directly as strings + /// To avoid backward incompatibility we parse them and check AST hashes. + /// This O(N^2), but amount of TTLs should be small, so it should be Ok. + auto tryToFindTTLExpressionInMapByASTMatching(const TTLInfoMap & ttl_info_map, const std::string & result_column) + { + ParserExpression parser; + auto ast_needle = parseQuery(parser, result_column.data(), result_column.data() + result_column.size(), "", 0, 0, 0); + for (auto it = ttl_info_map.begin(); it != ttl_info_map.end(); ++it) + { + const std::string & stored_expression = it->first; + auto ast_candidate = parseQuery(parser, stored_expression.data(), stored_expression.data() + stored_expression.size(), "", 0, 0, 0); + if (ast_candidate->getTreeHash(false) == ast_needle->getTreeHash(false)) + return it; + } + return ttl_info_map.end(); + } +} + std::optional selectTTLDescriptionForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max) { time_t best_ttl_time = 0; @@ -281,7 +306,11 @@ std::optional selectTTLDescriptionForTTLInfos(const TTLDescripti auto ttl_info_it = ttl_info_map.find(ttl_entry_it->result_column); if (ttl_info_it == ttl_info_map.end()) - continue; + { + ttl_info_it = tryToFindTTLExpressionInMapByASTMatching(ttl_info_map, ttl_entry_it->result_column); + if (ttl_info_it == ttl_info_map.end()) + continue; + } time_t ttl_time; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index fc3108e522a..149f86cef00 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -53,19 +53,28 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( profile_callback); } -IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( +MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, + const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) { return std::make_unique( - shared_from_this(), columns_list, - metadata_snapshot, indices_to_recalc, stats_to_recalc_, - getMarksFileExtension(), + data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, + metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 84eeec4211b..7465e08b7c4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -35,15 +35,6 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; - MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) override; - bool isStoredOnDisk() const override { return true; } bool isStoredOnRemoteDisk() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 1605e5cdb9a..fb0f0ba9154 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -10,32 +10,41 @@ namespace ErrorCodes } MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc_, const Statistics & stats_to_recalc, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_, + : MergeTreeDataPartWriterOnDisk( + data_part_name_, logger_name_, serializations_, + data_part_storage_, index_granularity_info_, storage_settings_, + columns_list_, metadata_snapshot_, virtual_columns_, indices_to_recalc_, stats_to_recalc, marks_file_extension_, default_codec_, settings_, index_granularity_) - , plain_file(data_part_->getDataPartStorage().writeFile( + , plain_file(getDataPartStorage().writeFile( MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION, settings.max_compress_block_size, settings_.query_write_settings)) , plain_hashing(*plain_file) { - marks_file = data_part_->getDataPartStorage().writeFile( + marks_file = getDataPartStorage().writeFile( MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_, 4096, settings_.query_write_settings); marks_file_hashing = std::make_unique(*marks_file); - if (data_part_->index_granularity_info.mark_type.compressed) + if (index_granularity_info.mark_type.compressed) { marks_compressor = std::make_unique( *marks_file_hashing, @@ -45,20 +54,35 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( marks_source_hashing = std::make_unique(*marks_compressor); } - auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); - addStreams(column, compression); + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, nullptr, compression); } } -void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc) +void MergeTreeDataPartWriterCompact::initDynamicStreamsIfNeeded(const Block & block) +{ + if (is_dynamic_streams_initialized) + return; + + is_dynamic_streams_initialized = true; + for (const auto & column : columns_list) + { + if (column.type->hasDynamicSubcolumns()) + { + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, block.getByName(column.name).column, compression); + } + } +} + +void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc) { ISerialization::StreamCallback callback = [&](const auto & substream_path) { assert(!substream_path.empty()); - String stream_name = ISerialization::getFileNameForStream(column, substream_path); + String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); /// Shared offsets for Nested type. if (compressed_streams.contains(stream_name)) @@ -81,7 +105,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, compressed_streams.emplace(stream_name, stream); }; - data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); + getSerialization(name_and_type.name)->enumerateStreams(callback, name_and_type.type, column); } namespace @@ -138,6 +162,7 @@ void writeColumnSingleGranule( serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; + serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state); @@ -148,6 +173,9 @@ void writeColumnSingleGranule( void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::Permutation * permutation) { + /// On first block of data initialize streams for dynamic subcolumns. + initDynamicStreamsIfNeeded(block); + /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, /// but not in case of vertical merge) @@ -230,7 +258,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeBinaryLittleEndian(static_cast(0), marks_out); writeColumnSingleGranule( - block.getByName(name_and_type->name), data_part->getSerialization(name_and_type->name), + block.getByName(name_and_type->name), getSerialization(name_and_type->name), stream_getter, granule.start_row, granule.rows_to_write); /// Each type always have at least one substream @@ -241,7 +269,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G } } -void MergeTreeDataPartWriterCompact::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums) +void MergeTreeDataPartWriterCompact::fillDataChecksums(MergeTreeDataPartChecksums & checksums) { if (columns_buffer.size() != 0) { @@ -411,7 +439,7 @@ size_t MergeTreeDataPartWriterCompact::ColumnsBuffer::size() const return accumulated_columns.at(0)->size(); } -void MergeTreeDataPartWriterCompact::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & /*checksums_to_remove*/) +void MergeTreeDataPartWriterCompact::fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & /*checksums_to_remove*/) { // If we don't have anything to write, skip finalization. if (!columns_list.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index ddb6178dce6..a5527b74e69 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -11,9 +11,15 @@ class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterCompact( - const MergeTreeMutableDataPartPtr & data_part, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc, const String & marks_file_extension, @@ -23,12 +29,12 @@ public: void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) override; + void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) override; void finish(bool sync) override; private: /// Finish serialization of the data. Flush rows in buffer to disk, compute checksums. - void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums); + void fillDataChecksums(MergeTreeDataPartChecksums & checksums); void finishDataSerialization(bool sync); void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override; @@ -42,7 +48,9 @@ private: void addToChecksums(MergeTreeDataPartChecksums & checksums); - void addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc); + void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc); + + void initDynamicStreamsIfNeeded(const Block & block); Block header; @@ -96,6 +104,8 @@ private: /// then finally to 'marks_file'. std::unique_ptr marks_compressor; std::unique_ptr marks_source_hashing; + + bool is_dynamic_streams_initialized = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 441447dcaba..bcf51bfcd3d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -140,16 +140,24 @@ void MergeTreeDataPartWriterOnDisk::Stream::addToChecksums(Merg MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeIndices & indices_to_recalc_, const Statistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : IMergeTreeDataPartWriter(data_part_, columns_list_, metadata_snapshot_, settings_, index_granularity_) + : IMergeTreeDataPartWriter( + data_part_name_, serializations_, data_part_storage_, index_granularity_info_, + storage_settings_, columns_list_, metadata_snapshot_, virtual_columns_, settings_, index_granularity_) , skip_indices(indices_to_recalc_) , stats(stats_to_recalc_) , marks_file_extension(marks_file_extension_) @@ -157,17 +165,18 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( , compute_granularity(index_granularity.empty()) , compress_primary_key(settings.compress_primary_key) , execution_stats(skip_indices.size(), stats.size()) - , log(getLogger(storage.getLogName() + " (DataPartWriter)")) + , log(getLogger(logger_name_ + " (DataPartWriter)")) { if (settings.blocks_are_granules_size && !index_granularity.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't take information about index granularity from blocks, when non empty index_granularity array specified"); - if (!data_part->getDataPartStorage().exists()) - data_part->getDataPartStorage().createDirectories(); + if (!getDataPartStorage().exists()) + getDataPartStorage().createDirectories(); if (settings.rewrite_primary_key) initPrimaryIndex(); + initSkipIndices(); initStatistics(); } @@ -223,7 +232,6 @@ static size_t computeIndexGranularityImpl( size_t MergeTreeDataPartWriterOnDisk::computeIndexGranularity(const Block & block) const { - const auto storage_settings = storage.getSettings(); return computeIndexGranularityImpl( block, storage_settings->index_granularity_bytes, @@ -237,7 +245,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() if (metadata_snapshot->hasPrimaryKey()) { String index_name = "primary" + getIndexExtension(compress_primary_key); - index_file_stream = data_part->getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings); + index_file_stream = getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings); index_file_hashing_stream = std::make_unique(*index_file_stream); if (compress_primary_key) @@ -256,7 +264,7 @@ void MergeTreeDataPartWriterOnDisk::initStatistics() String stats_name = stat_ptr->getFileName(); stats_streams.emplace_back(std::make_unique>( stats_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stats_name, STAT_FILE_SUFFIX, default_codec, settings.max_compress_block_size, settings.query_write_settings)); @@ -265,6 +273,9 @@ void MergeTreeDataPartWriterOnDisk::initStatistics() void MergeTreeDataPartWriterOnDisk::initSkipIndices() { + if (skip_indices.empty()) + return; + ParserCodec codec_parser; auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); @@ -275,7 +286,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() skip_indices_streams.emplace_back( std::make_unique>( stream_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stream_name, skip_index->getSerializedFileExtension(), stream_name, marks_file_extension, default_codec, settings.max_compress_block_size, @@ -285,7 +296,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() GinIndexStorePtr store = nullptr; if (typeid_cast(&*skip_index) != nullptr) { - store = std::make_shared(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment); + store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); @@ -468,7 +479,7 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data if (!skip_indices_aggregators[i]->empty()) skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed_hashing); - /// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown + /// Register additional files written only by the full-text index. Required because otherwise DROP TABLE complains about unknown /// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and /// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files. if (typeid_cast(&*skip_indices[i]) != nullptr) @@ -498,7 +509,7 @@ void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync) } for (size_t i = 0; i < stats.size(); ++i) - LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part->name); + LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part_name); } void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums) @@ -524,7 +535,7 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync) store.second->finalize(); for (size_t i = 0; i < skip_indices.size(); ++i) - LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part->name); + LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part_name); gin_index_stores.clear(); skip_indices_streams.clear(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 9f2cc3970fa..0c31cabc8c4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -5,9 +5,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -97,16 +94,22 @@ public: void sync() const; - void addToChecksums(IMergeTreeDataPart::Checksums & checksums); + void addToChecksums(MergeTreeDataPartChecksums & checksums); }; using StreamPtr = std::unique_ptr>; using StatisticStreamPtr = std::unique_ptr>; MergeTreeDataPartWriterOnDisk( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, @@ -133,13 +136,13 @@ protected: void calculateAndSerializeStatistics(const Block & stats_block); /// Finishes primary index serialization: write final primary index row (if required) and compute checksums - void fillPrimaryIndexChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillPrimaryIndexChecksums(MergeTreeDataPartChecksums & checksums); void finishPrimaryIndexSerialization(bool sync); /// Finishes skip indices serialization: write all accumulated data to disk and compute checksums - void fillSkipIndicesChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillSkipIndicesChecksums(MergeTreeDataPartChecksums & checksums); void finishSkipIndicesSerialization(bool sync); - void fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillStatisticsChecksums(MergeTreeDataPartChecksums & checksums); void finishStatisticsSerialization(bool sync); /// Get global number of the current which we are writing (or going to start to write) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 6a3b08d4d65..afa14d8a98a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -76,37 +76,62 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, } MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc_, const Statistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_, - indices_to_recalc_, stats_to_recalc_, marks_file_extension_, - default_codec_, settings_, index_granularity_) + : MergeTreeDataPartWriterOnDisk( + data_part_name_, logger_name_, serializations_, + data_part_storage_, index_granularity_info_, storage_settings_, + columns_list_, metadata_snapshot_, virtual_columns_, + indices_to_recalc_, stats_to_recalc_, marks_file_extension_, + default_codec_, settings_, index_granularity_) { - auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); - addStreams(column, compression); + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, nullptr, compression); + } +} + +void MergeTreeDataPartWriterWide::initDynamicStreamsIfNeeded(const DB::Block & block) +{ + if (is_dynamic_streams_initialized) + return; + + is_dynamic_streams_initialized = true; + block_sample = block.cloneEmpty(); + for (const auto & column : columns_list) + { + if (column.type->hasDynamicSubcolumns()) + { + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, block_sample.getByName(column.name).column, compression); + } } } void MergeTreeDataPartWriterWide::addStreams( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column, const ASTPtr & effective_codec_desc) { ISerialization::StreamCallback callback = [&](const auto & substream_path) { assert(!substream_path.empty()); - auto storage_settings = storage.getSettings(); - auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + auto full_stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); String stream_name; if (storage_settings->replace_long_file_name_to_hash && full_stream_name.size() > storage_settings->max_file_name_length) @@ -114,6 +139,10 @@ void MergeTreeDataPartWriterWide::addStreams( else stream_name = full_stream_name; + /// Shared offsets for Nested type. + if (column_streams.contains(stream_name)) + return; + auto it = stream_name_to_full_name.find(stream_name); if (it != stream_name_to_full_name.end() && it->second != full_stream_name) throw Exception(ErrorCodes::INCORRECT_FILE_NAME, @@ -121,10 +150,6 @@ void MergeTreeDataPartWriterWide::addStreams( " It is a collision between a filename for one column and a hash of filename for another column or a bug", stream_name, it->second, full_stream_name); - /// Shared offsets for Nested type. - if (column_streams.contains(stream_name)) - return; - const auto & subtype = substream_path.back().data.type; CompressionCodecPtr compression_codec; @@ -138,7 +163,7 @@ void MergeTreeDataPartWriterWide::addStreams( auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); - const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); + const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), name_and_type.getNameInStorage()); UInt64 max_compress_block_size = 0; if (column_desc) @@ -149,7 +174,7 @@ void MergeTreeDataPartWriterWide::addStreams( column_streams[stream_name] = std::make_unique>( stream_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stream_name, DATA_FILE_EXTENSION, stream_name, marks_file_extension, compression_codec, @@ -163,7 +188,7 @@ void MergeTreeDataPartWriterWide::addStreams( }; ISerialization::SubstreamPath path; - data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); + getSerialization(name_and_type.name)->enumerateStreams(callback, name_and_type.type, column); } const String & MergeTreeDataPartWriterWide::getStreamName( @@ -222,6 +247,9 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Permutation * permutation) { + /// On first block of data initialize streams for dynamic subcolumns. + initDynamicStreamsIfNeeded(block); + /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, /// but not in case of vertical part of vertical merge) @@ -264,7 +292,7 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm { auto & column = block_to_write.getByName(it->name); - if (data_part->getSerialization(it->name)->getKind() != ISerialization::Kind::SPARSE) + if (getSerialization(it->name)->getKind() != ISerialization::Kind::SPARSE) column.column = recursiveRemoveSparse(column.column); if (permutation) @@ -302,11 +330,12 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm } void MergeTreeDataPartWriterWide::writeSingleMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns, size_t number_of_rows) { - StreamsWithMarks marks = getCurrentMarksForColumn(column, offset_columns); + auto * sample_column = block_sample.findByName(name_and_type.name); + StreamsWithMarks marks = getCurrentMarksForColumn(name_and_type, sample_column ? sample_column->column : nullptr, offset_columns); for (const auto & mark : marks) flushMarkToFile(mark, number_of_rows); } @@ -323,21 +352,22 @@ void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stre } StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column_sample, WrittenOffsetColumns & offset_columns) { StreamsWithMarks result; - const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); + const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), name_and_type.getNameInStorage()); UInt64 min_compress_block_size = 0; if (column_desc) if (const auto * value = column_desc->settings.tryGet("min_compress_block_size")) min_compress_block_size = value->safeGet(); if (!min_compress_block_size) min_compress_block_size = settings.min_compress_block_size; - data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - auto stream_name = getStreamName(column, substream_path); + auto stream_name = getStreamName(name_and_type, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.contains(stream_name)) @@ -355,7 +385,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( stream_with_mark.mark.offset_in_decompressed_block = stream.compressed_hashing.offset(); result.push_back(stream_with_mark); - }); + }, name_and_type.type, column_sample); return result; } @@ -368,7 +398,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( ISerialization::SerializeBinaryBulkSettings & serialize_settings, const Granule & granule) { - const auto & serialization = data_part->getSerialization(name_and_type.name); + const auto & serialization = getSerialization(name_and_type.name); serialization->serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.rows_to_write, serialize_settings, serialization_state); /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. @@ -382,7 +412,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( return; column_streams.at(stream_name)->compressed_hashing.nextIfAtEnd(); - }); + }, name_and_type.type, column.getPtr()); } /// Column must not be empty. (column.size() !== 0) @@ -398,7 +428,7 @@ void MergeTreeDataPartWriterWide::writeColumn( const auto & [name, type] = name_and_type; auto [it, inserted] = serialization_states.emplace(name, nullptr); - auto serialization = data_part->getSerialization(name_and_type.name); + auto serialization = getSerialization(name_and_type.name); if (inserted) { @@ -407,11 +437,10 @@ void MergeTreeDataPartWriterWide::writeColumn( serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } - const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); - serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; for (const auto & granule : granules) { @@ -424,7 +453,7 @@ void MergeTreeDataPartWriterWide::writeColumn( "We have to add new mark for column, but already have non written mark. " "Current mark {}, total marks {}, offset {}", getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark); - last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, offset_columns); + last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, column.getPtr(), offset_columns); } writeSingleGranule( @@ -453,14 +482,14 @@ void MergeTreeDataPartWriterWide::writeColumn( bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) offset_columns.insert(getStreamName(name_and_type, substream_path)); - }); + }, name_and_type.type, column.getPtr()); } void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePair & name_type) { const auto & [name, type] = name_type; - const auto & serialization = data_part->getSerialization(name_type.name); + const auto & serialization = getSerialization(name_type.name); if (!type->isValueRepresentedByNumber() || type->haveSubtypes() || serialization->getKind() != ISerialization::Kind::DEFAULT) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot validate column of non fixed type {}", type->getName()); @@ -470,21 +499,21 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai String bin_path = escaped_name + DATA_FILE_EXTENSION; /// Some columns may be removed because of ttl. Skip them. - if (!data_part->getDataPartStorage().exists(mrk_path)) + if (!getDataPartStorage().exists(mrk_path)) return; - auto mrk_file_in = data_part->getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt); + auto mrk_file_in = getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt); std::unique_ptr mrk_in; - if (data_part->index_granularity_info.mark_type.compressed) + if (index_granularity_info.mark_type.compressed) mrk_in = std::make_unique(std::move(mrk_file_in)); else mrk_in = std::move(mrk_file_in); - DB::CompressedReadBufferFromFile bin_in(data_part->getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt)); + DB::CompressedReadBufferFromFile bin_in(getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt)); bool must_be_last = false; UInt64 offset_in_compressed_file = 0; UInt64 offset_in_decompressed_block = 0; - UInt64 index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; + UInt64 index_granularity_rows = index_granularity_info.fixed_index_granularity; size_t mark_num; @@ -500,7 +529,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (settings.can_use_adaptive_granularity) readBinaryLittleEndian(index_granularity_rows, *mrk_in); else - index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; + index_granularity_rows = index_granularity_info.fixed_index_granularity; if (must_be_last) { @@ -533,7 +562,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", - data_part->getDataPartStorage().getFullPath(), + getDataPartStorage().getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount()); @@ -591,15 +620,13 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai " index granularity size {}, last rows {}", column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); } - } -void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) +void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) { - const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; - serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { @@ -622,7 +649,8 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum if (!serialization_states.empty()) { serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns); - data_part->getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); + serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX; + getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); } if (write_final_mark) @@ -665,7 +693,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync) { if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes() - && data_part->getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT) + && getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT) { validateColumnOfFixedSize(column); } @@ -674,7 +702,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync) } -void MergeTreeDataPartWriterWide::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) +void MergeTreeDataPartWriterWide::fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) { // If we don't have anything to write, skip finalization. if (!columns_list.empty()) @@ -703,17 +731,17 @@ void MergeTreeDataPartWriterWide::finish(bool sync) } void MergeTreeDataPartWriterWide::writeFinalMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns) { - writeSingleMark(column, offset_columns, 0); + writeSingleMark(name_and_type, offset_columns, 0); /// Memoize information about offsets - data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) - offset_columns.insert(getStreamName(column, substream_path)); - }); + offset_columns.insert(getStreamName(name_and_type, substream_path)); + }, name_and_type.type, block_sample.getByName(name_and_type.name).column); } static void fillIndexGranularityImpl( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index f5ff323563d..9d18ac76880 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -21,9 +21,15 @@ class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterWide( - const MergeTreeMutableDataPartPtr & data_part, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, @@ -33,14 +39,14 @@ public: void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) final; + void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) final; void finish(bool sync) final; private: /// Finish serialization of data: write final mark if required and compute checksums /// Also validate written data in debug mode - void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove); + void fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove); void finishDataSerialization(bool sync); /// Write data of one column. @@ -63,7 +69,8 @@ private: /// Take offsets from column and return as MarkInCompressed file with stream name StreamsWithMarks getCurrentMarksForColumn( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column_sample, WrittenOffsetColumns & offset_columns); /// Write mark to disk using stream and rows count @@ -73,18 +80,21 @@ private: /// Write mark for column taking offsets from column stream void writeSingleMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns, size_t number_of_rows); void writeFinalMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns); void addStreams( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column, const ASTPtr & effective_codec_desc); + void initDynamicStreamsIfNeeded(const Block & block); + /// Method for self check (used in debug-build only). Checks that written /// data and corresponding marks are consistent. Otherwise throws logical /// errors. @@ -129,6 +139,10 @@ private: /// How many rows we have already written in the current mark. /// More than zero when incoming blocks are smaller then their granularity. size_t rows_written_in_last_mark = 0; + + Block block_sample; + + bool is_dynamic_streams_initialized = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index de769c59d33..11058c542a6 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -53,6 +53,8 @@ namespace CurrentMetrics extern const Metric MergeTreeDataSelectExecutorThreads; extern const Metric MergeTreeDataSelectExecutorThreadsActive; extern const Metric MergeTreeDataSelectExecutorThreadsScheduled; + extern const Metric FilteringMarksWithPrimaryKey; + extern const Metric FilteringMarksWithSecondaryKeys; } namespace DB @@ -90,16 +92,10 @@ size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( for (const auto & part : parts) { - MarkRanges ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, {}, settings, log); - - /** In order to get a lower bound on the number of rows that match the condition on PK, - * consider only guaranteed full marks. - * That is, do not take into account the first and last marks, which may be incomplete. - */ - for (const auto & range : ranges) - if (range.end - range.begin > 2) - rows_count += part->index_granularity.getRowsCountInRange({range.begin + 1, range.end - 1}); - + MarkRanges exact_ranges; + markRangesFromPKRange(part, metadata_snapshot, key_condition, {}, &exact_ranges, settings, log); + for (const auto & range : exact_ranges) + rows_count += part->index_granularity.getRowsCountInRange(range); } return rows_count; @@ -594,7 +590,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd LoggerPtr log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, - bool use_skip_indexes) + bool use_skip_indexes, + bool find_exact_ranges) { chassert(alter_conversions.empty() || parts.size() == alter_conversions.size()); @@ -664,15 +661,29 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd size_t total_marks_count = part->index_granularity.getMarksCountWithoutFinal(); if (metadata_snapshot->hasPrimaryKey() || part_offset_condition) - ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, part_offset_condition, settings, log); + { + CurrentMetrics::Increment metric(CurrentMetrics::FilteringMarksWithPrimaryKey); + ranges.ranges = markRangesFromPKRange( + part, + metadata_snapshot, + key_condition, + part_offset_condition, + find_exact_ranges ? &ranges.exact_ranges : nullptr, + settings, + log); + } else if (total_marks_count) + { ranges.ranges = MarkRanges{{MarkRange{0, total_marks_count}}}; + } sum_marks_pk.fetch_add(ranges.getMarksCount(), std::memory_order_relaxed); if (!ranges.ranges.empty()) sum_parts_pk.fetch_add(1, std::memory_order_relaxed); + CurrentMetrics::Increment metric(CurrentMetrics::FilteringMarksWithSecondaryKeys); + for (size_t idx = 0; idx < skip_indexes.useful_indices.size(); ++idx) { if (ranges.ranges.empty()) @@ -733,6 +744,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd num_threads = std::min(num_streams, settings.max_threads_for_indexes); } + LOG_TRACE(log, "Filtering marks by primary and secondary keys"); + if (num_threads <= 1) { for (size_t part_index = 0; part_index < parts.size(); ++part_index) @@ -740,7 +753,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd } else { - /// Parallel loading of data parts. + /// Parallel loading and filtering of data parts. ThreadPool pool( CurrentMetrics::MergeTreeDataSelectExecutorThreads, CurrentMetrics::MergeTreeDataSelectExecutorThreadsActive, @@ -748,8 +761,11 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd num_threads); for (size_t part_index = 0; part_index < parts.size(); ++part_index) + { pool.scheduleOrThrowOnError([&, part_index, thread_group = CurrentThread::getGroup()] { + setThreadName("MergeTreeIndex"); + SCOPE_EXIT_SAFE( if (thread_group) CurrentThread::detachFromGroupIfNotDetached(); @@ -759,6 +775,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd process_part(part_index); }); + } pool.wait(); } @@ -887,7 +904,7 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar /// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions. return ReadFromMergeTree::selectRangesToRead( std::move(parts), - /*alter_conversions=*/ {}, + /*alter_conversions=*/{}, metadata_snapshot, query_info, context, @@ -896,7 +913,8 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar data, column_names_to_return, log, - indexes); + indexes, + /*find_exact_ranges*/false); } QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( @@ -987,11 +1005,13 @@ size_t MergeTreeDataSelectExecutor::minMarksForConcurrentRead( /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. +/// If @exact_ranges is not null, fill it with ranges containing marks of fully matched records. MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( const MergeTreeData::DataPartPtr & part, const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const std::optional & part_offset_condition, + MarkRanges * exact_ranges, const Settings & settings, LoggerPtr log) { @@ -1004,8 +1024,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( bool has_final_mark = part->index_granularity.hasFinalMark(); + bool key_condition_useful = !key_condition.alwaysUnknownOrTrue(); + bool part_offset_condition_useful = part_offset_condition && !part_offset_condition->alwaysUnknownOrTrue(); + /// If index is not used. - if (key_condition.alwaysUnknownOrTrue() && (!part_offset_condition || part_offset_condition->alwaysUnknownOrTrue())) + if (!key_condition_useful && !part_offset_condition_useful) { if (has_final_mark) res.push_back(MarkRange(0, marks_count - 1)); @@ -1015,6 +1038,10 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( return res; } + /// If conditions are relaxed, don't fill exact ranges. + if (key_condition.isRelaxed() || (part_offset_condition && part_offset_condition->isRelaxed())) + exact_ranges = nullptr; + const auto & primary_key = metadata_snapshot->getPrimaryKey(); auto index_columns = std::make_shared(); const auto & key_indices = key_condition.getKeyIndices(); @@ -1064,12 +1091,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( std::vector part_offset_left(2); std::vector part_offset_right(2); - auto may_be_true_in_range = [&](MarkRange & range) + auto check_in_range = [&](const MarkRange & range, BoolMask initial_mask = {}) { - bool key_condition_maybe_true = true; - if (!key_condition.alwaysUnknownOrTrue()) + auto check_key_condition = [&]() { - if (range.end == marks_count && !has_final_mark) + if (range.end == marks_count) { for (size_t i = 0; i < used_key_size; ++i) { @@ -1083,9 +1109,6 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( } else { - if (has_final_mark && range.end == marks_count) - range.end -= 1; /// Remove final empty mark. It's useful only for primary key condition. - for (size_t i = 0; i < used_key_size; ++i) { if ((*index_columns)[i].column) @@ -1101,19 +1124,17 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( } } } - key_condition_maybe_true = key_condition.mayBeTrueInRange(used_key_size, index_left.data(), index_right.data(), key_types); - } + return key_condition.checkInRange(used_key_size, index_left.data(), index_right.data(), key_types, initial_mask); + }; - bool part_offset_condition_maybe_true = true; - - if (part_offset_condition && !part_offset_condition->alwaysUnknownOrTrue()) + auto check_part_offset_condition = [&]() { auto begin = part->index_granularity.getMarkStartingRow(range.begin); auto end = part->index_granularity.getMarkStartingRow(range.end) - 1; if (begin > end) { /// Empty mark (final mark) - part_offset_condition_maybe_true = false; + return BoolMask(false, true); } else { @@ -1122,16 +1143,23 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( part_offset_left[1] = part->name; part_offset_right[1] = part->name; - part_offset_condition_maybe_true - = part_offset_condition->mayBeTrueInRange(2, part_offset_left.data(), part_offset_right.data(), part_offset_types); + return part_offset_condition->checkInRange( + 2, part_offset_left.data(), part_offset_right.data(), part_offset_types, initial_mask); } - } - return key_condition_maybe_true && part_offset_condition_maybe_true; + }; + + if (key_condition_useful && part_offset_condition_useful) + return check_key_condition() & check_part_offset_condition(); + else if (key_condition_useful) + return check_key_condition(); + else if (part_offset_condition_useful) + return check_part_offset_condition(); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Condition is useless but check_in_range still gets called. It is a bug"); }; - bool key_condition_exact_range = key_condition.alwaysUnknownOrTrue() || key_condition.matchesExactContinuousRange(); - bool part_offset_condition_exact_range - = !part_offset_condition || part_offset_condition->alwaysUnknownOrTrue() || part_offset_condition->matchesExactContinuousRange(); + bool key_condition_exact_range = !key_condition_useful || key_condition.matchesExactContinuousRange(); + bool part_offset_condition_exact_range = !part_offset_condition_useful || part_offset_condition->matchesExactContinuousRange(); const String & part_name = part->isProjectionPart() ? fmt::format("{}.{}", part->name, part->getParentPart()->name) : part->name; if (!key_condition_exact_range || !part_offset_condition_exact_range) @@ -1147,12 +1175,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( part->index_granularity_info.fixed_index_granularity, part->index_granularity_info.index_granularity_bytes); - /** There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back). - * At each step, take the left segment and check if it fits. - * If fits, split it into smaller ones and put them on the stack. If not, discard it. - * If the segment is already of one mark length, add it to response and discard it. - */ - std::vector ranges_stack = { {0, marks_count} }; + /// There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back). + /// At each step, take the left segment and check if it fits. + /// If fits, split it into smaller ones and put them on the stack. If not, discard it. + /// If the segment is already of one mark length, add it to response and discard it. + std::vector ranges_stack = { {0, marks_count - (has_final_mark ? 1 : 0)} }; size_t steps = 0; @@ -1163,7 +1190,9 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( ++steps; - if (!may_be_true_in_range(range)) + auto result + = check_in_range(range, exact_ranges && range.end == range.begin + 1 ? BoolMask() : BoolMask::consider_only_can_be_true); + if (!result.can_be_true) continue; if (range.end == range.begin + 1) @@ -1173,6 +1202,14 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( res.push_back(range); else res.back().end = range.end; + + if (exact_ranges && !result.can_be_false) + { + if (exact_ranges->empty() || range.begin - exact_ranges->back().end > min_marks_for_seek) + exact_ranges->push_back(range); + else + exact_ranges->back().end = range.end; + } } else { @@ -1187,7 +1224,12 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( } } - LOG_TRACE(log, "Used generic exclusion search over index for part {} with {} steps", part_name, steps); + LOG_TRACE( + log, + "Used generic exclusion search {}over index for part {} with {} steps", + exact_ranges ? "with exact ranges " : "", + part_name, + steps); } else { @@ -1201,40 +1243,84 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( MarkRange result_range; + size_t last_mark = marks_count - (has_final_mark ? 1 : 0); size_t searched_left = 0; - size_t searched_right = marks_count; + size_t searched_right = last_mark; + bool check_left = false; + bool check_right = false; while (searched_left + 1 < searched_right) { const size_t middle = (searched_left + searched_right) / 2; MarkRange range(0, middle); - if (may_be_true_in_range(range)) + if (check_in_range(range, BoolMask::consider_only_can_be_true).can_be_true) searched_right = middle; else searched_left = middle; ++steps; + check_left = true; } result_range.begin = searched_left; LOG_TRACE(log, "Found (LEFT) boundary mark: {}", searched_left); - searched_right = marks_count; + searched_right = last_mark; while (searched_left + 1 < searched_right) { const size_t middle = (searched_left + searched_right) / 2; - MarkRange range(middle, marks_count); - if (may_be_true_in_range(range)) + MarkRange range(middle, last_mark); + if (check_in_range(range, BoolMask::consider_only_can_be_true).can_be_true) searched_left = middle; else searched_right = middle; ++steps; + check_right = true; } result_range.end = searched_right; LOG_TRACE(log, "Found (RIGHT) boundary mark: {}", searched_right); - if (result_range.begin < result_range.end && may_be_true_in_range(result_range)) - res.emplace_back(std::move(result_range)); + if (result_range.begin < result_range.end) + { + if (exact_ranges) + { + if (result_range.begin + 1 == result_range.end) + { + auto check_result = check_in_range(result_range); + if (check_result.can_be_true) + { + if (!check_result.can_be_false) + exact_ranges->emplace_back(result_range); + res.emplace_back(std::move(result_range)); + } + } + else + { + /// Candidate range with size > 1 is already can_be_true + auto result_exact_range = result_range; + if (check_in_range({result_range.begin, result_range.begin + 1}, BoolMask::consider_only_can_be_false).can_be_false) + ++result_exact_range.begin; - LOG_TRACE(log, "Found {} range in {} steps", res.empty() ? "empty" : "continuous", steps); + if (check_in_range({result_range.end - 1, result_range.end}, BoolMask::consider_only_can_be_false).can_be_false) + --result_exact_range.end; + + if (result_exact_range.begin < result_exact_range.end) + { + chassert(check_in_range(result_exact_range, BoolMask::consider_only_can_be_false) == BoolMask(true, false)); + exact_ranges->emplace_back(std::move(result_exact_range)); + } + + res.emplace_back(std::move(result_range)); + } + } + else + { + /// Candidate range with both ends checked is already can_be_true + if ((check_left && check_right) || check_in_range(result_range, BoolMask::consider_only_can_be_true).can_be_true) + res.emplace_back(std::move(result_range)); + } + } + + LOG_TRACE( + log, "Found {} range {}in {} steps", res.empty() ? "empty" : "continuous", exact_ranges ? "with exact range " : "", steps); } return res; @@ -1296,8 +1382,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( size_t last_index_mark = 0; PostingsCacheForStore cache_in_store; - - if (dynamic_cast(&*index_helper) != nullptr) + if (dynamic_cast(index_helper.get())) cache_in_store.store = GinIndexStoreFactory::instance().get(index_helper->getFileName(), part->getDataPartStoragePtr()); for (size_t i = 0; i < ranges.size(); ++i) @@ -1315,12 +1400,12 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( auto ann_condition = std::dynamic_pointer_cast(condition); if (ann_condition != nullptr) { - // vector of indexes of useful ranges + /// An array of indices of useful ranges. auto result = ann_condition->getUsefulRanges(granule); for (auto range : result) { - // range for corresponding index + /// The range for the corresponding index. MarkRange data_range( std::max(ranges[i].begin, index_mark * index_granularity + range), std::min(ranges[i].end, index_mark * index_granularity + range + 1)); @@ -1344,8 +1429,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( continue; MarkRange data_range( - std::max(ranges[i].begin, index_mark * index_granularity), - std::min(ranges[i].end, (index_mark + 1) * index_granularity)); + std::max(ranges[i].begin, index_mark * index_granularity), + std::min(ranges[i].end, (index_mark + 1) * index_granularity)); if (res.empty() || data_range.begin - res.back().end > min_marks_for_seek) res.push_back(data_range); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index ecccd6d55e3..788355c1e59 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -68,6 +68,7 @@ public: const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const std::optional & part_offset_condition, + MarkRanges * exact_ranges, const Settings & settings, LoggerPtr log); @@ -201,7 +202,8 @@ public: LoggerPtr log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, - bool use_skip_indexes); + bool use_skip_indexes, + bool find_exact_ranges); /// Create expression for sampling. /// Also, calculate _sample_factor if needed. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index daa163d741c..04182062b12 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -360,8 +361,6 @@ Block MergeTreeDataWriter::mergeBlock( return std::make_shared( block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0, merging_params.graphite_params, time(nullptr)); } - - UNREACHABLE(); }; auto merging_algorithm = get_merging_algorithm(); @@ -422,7 +421,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( auto columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); for (auto & column : columns) - if (column.type->hasDynamicSubcolumns()) + if (column.type->hasDynamicSubcolumnsDeprecated()) column.type = block.getByName(column.name).type; auto minmax_idx = std::make_shared(); @@ -466,7 +465,13 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( temp_part.temporary_directory_lock = data.getTemporaryPartDirectoryHolder(part_dir); - auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices()); + MergeTreeIndices indices; + if (context->getSettingsRef().materialize_skip_indexes_on_insert) + indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices()); + + Statistics statistics; + if (context->getSettingsRef().materialize_statistics_on_insert) + statistics = MergeTreeStatisticsFactory::instance().getMany(metadata_snapshot->getColumns()); /// If we need to calculate some columns to sort. if (metadata_snapshot->hasSortingKey() || metadata_snapshot->hasSecondaryIndices()) @@ -498,6 +503,12 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } + if (data.getSettings()->allow_experimental_optimized_row_order) + { + RowOrderOptimizer::optimize(block, sort_description, perm); + perm_ptr = &perm; + } + Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names; if (context->getSettingsRef().optimize_on_insert) { @@ -508,9 +519,10 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( /// Size of part would not be greater than block.bytes() + epsilon size_t expected_size = block.bytes(); - /// If optimize_on_insert is true, block may become empty after merge. - /// There is no need to create empty part. - if (expected_size == 0) + /// If optimize_on_insert is true, block may become empty after merge. There + /// is no need to create empty part. Since expected_size could be zero when + /// part only contains empty tuples. As a result, check rows instead. + if (block.rows() == 0) return temp_part; DB::IMergeTreeDataPart::TTLInfos move_ttl_infos; @@ -598,9 +610,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( metadata_snapshot, columns, indices, - MergeTreeStatisticsFactory::instance().getMany(metadata_snapshot->getColumns()), + statistics, compression_codec, - context->getCurrentTransaction(), + context->getCurrentTransaction() ? context->getCurrentTransaction()->tid : Tx::PrehistoricTID, false, false, context->getWriteSettings()); @@ -718,6 +730,12 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocksAlreadySorted); } + if (data.getSettings()->allow_experimental_optimized_row_order) + { + RowOrderOptimizer::optimize(block, sort_description, perm); + perm_ptr = &perm; + } + if (projection.type == ProjectionDescription::Type::Aggregate && merge_is_needed) { ProfileEventTimeIncrement watch(ProfileEvents::MergeTreeDataProjectionWriterMergingBlocksMicroseconds); @@ -738,7 +756,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( MergeTreeIndices{}, Statistics{}, /// TODO(hanfei): It should be helpful to write statistics for projection result. compression_codec, - NO_TRANSACTION_PTR, + Tx::PrehistoricTID, false, false, data.getContext()->getWriteSettings()); out->writeWithPermutation(block, perm_ptr); diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index feeb1808a6f..a9125b4047e 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -13,7 +13,7 @@ namespace DB class MMappedFileCache; using MMappedFileCachePtr = std::shared_ptr; -enum class CompactPartsReadMethod +enum class CompactPartsReadMethod : uint8_t { SingleBuffer, MultiBuffer, @@ -44,6 +44,8 @@ struct MergeTreeReaderSettings bool enable_multiple_prewhere_read_steps = false; /// If true, try to lower size of read buffer according to granule size and compressed block size. bool adjust_read_buffer_size = true; + /// If true, it's allowed to read the whole part without reading marks. + bool can_read_part_without_marks = false; }; struct MergeTreeWriterSettings @@ -72,6 +74,8 @@ struct MergeTreeWriterSettings , blocks_are_granules_size(blocks_are_granules_size_) , query_write_settings(query_write_settings_) , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) + , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) + , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) { } @@ -91,6 +95,9 @@ struct MergeTreeWriterSettings WriteSettings query_write_settings; size_t max_threads_for_annoy_index_creation; + + size_t low_cardinality_max_dictionary_size; + bool low_cardinality_use_single_dictionary_for_part; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index e36459b019f..e492ca0aec2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -28,7 +28,7 @@ namespace ErrorCodes template AnnoyIndexWithSerialization::AnnoyIndexWithSerialization(size_t dimensions) - : Base::AnnoyIndex(dimensions) + : Base::AnnoyIndex(static_cast(dimensions)) { } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index 4f25a014382..fc5147bb56c 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -490,11 +490,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeIn( if (key_node_function_name == "arrayElement") { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] IN ('') because if key does not exists in map - * we return default value for arrayElement. + * It is important to ignore keys like column_map['Key'] IN ('') because if the key does not exist in the map + * we return the default value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because - * that way we skip necessary granules where map key does not exists. + * that way we skip necessary granules where the map key does not exist. */ if (!prepared_set) return false; @@ -781,11 +781,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( if (key_node_function_name == "arrayElement" && (function_name == "equals" || function_name == "notEquals")) { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map - * we return default value for arrayElement. + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in the map + * we return default the value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because - * that way we skip necessary granules where map key does not exists. + * that way we skip necessary granules where map key does not exist. */ if (value_field == value_type->getDefault()) return false; @@ -865,8 +865,8 @@ void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * p const auto & column_and_type = block.getByName(index_columns_name[column]); auto index_column = BloomFilterHash::hashWithColumn(column_and_type.type, column_and_type.column, *pos, max_read_rows); - const auto & index_col = checkAndGetColumn(index_column.get()); - const auto & index_data = index_col->getData(); + const auto & index_col = checkAndGetColumn(*index_column); + const auto & index_data = index_col.getData(); for (const auto & hash: index_data) column_hashes[column].insert(hash); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 826b149cf01..6f46ee0c184 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -444,11 +444,11 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( if (key_function_node_function_name == "arrayElement") { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map - * we return default value for arrayElement. + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in the map + * we return default the value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because - * that way we skip necessary granules where map key does not exists. + * that way we skip necessary granules where map key does not exist. */ if (value_field == value_type->getDefault()) return false; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 9535cf18127..af9ee710f88 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -31,6 +31,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int ILLEGAL_INDEX; extern const int INCORRECT_QUERY; } @@ -477,11 +478,11 @@ bool MergeTreeConditionFullText::traverseASTEquals( if (function.getFunctionName() == "arrayElement") { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map - * we return default value for arrayElement. + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in the map + * we return default the value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because - * that way we skip necessary granules where map key does not exists. + * that way we skip necessary granules where map key does not exist. */ if (value_field == value_type->getDefault()) return false; @@ -741,6 +742,14 @@ bool MergeTreeConditionFullText::tryPrepareSetGinFilter( MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const { + /// Index type 'inverted' was renamed to 'full_text' in May 2024. + /// Tables with old indexes can be loaded during a transition period. We still want let users know that they should drop existing + /// indexes and re-create them. Function `createIndexGranule` is called whenever the index is used by queries. Reject the query if we + /// have an old index. + /// TODO: remove this at the end of 2024. + if (index.type == INVERTED_INDEX_NAME) + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'inverted' are no longer supported. Please drop and recreate the index as type 'full-text'"); + return std::make_shared(index.name, index.column_names.size(), params); } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h index 85006c3ffde..87445c99ade 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h @@ -64,8 +64,8 @@ public: std::string describe() const; }; -constexpr inline auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } -constexpr inline auto getAdaptiveMrkSizeWide() { return sizeof(UInt64) * 3; } +constexpr auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } +constexpr auto getAdaptiveMrkSizeWide() { return sizeof(UInt64) * 3; } inline size_t getAdaptiveMrkSizeCompact(size_t columns_num); } diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index 6012994b46d..e7ae1fc5c13 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -31,6 +31,8 @@ std::unique_ptr makeIndexReader( load_marks_threadpool, /*num_columns_in_mark=*/ 1); + marks_loader->startAsyncLoad(); + return std::make_unique( part->getDataPartStoragePtr(), index->getFileName(), extension, marks_count, @@ -65,6 +67,7 @@ MergeTreeIndexReader::MergeTreeIndexReader( mark_cache, uncompressed_cache, std::move(settings)); + version = index_format.version; stream->adjustRightMark(getLastMark(all_mark_ranges_)); diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index dba2bc1e56c..b11cbf1e034 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -35,8 +36,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( size_t max_rows_) : index_name(index_name_) , max_rows(max_rows_) - , index_sample_block(index_sample_block_) - , block(index_sample_block) + , block(index_sample_block_.cloneEmpty()) { } @@ -47,8 +47,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( MutableColumns && mutable_columns_) : index_name(index_name_) , max_rows(max_rows_) - , index_sample_block(index_sample_block_) - , block(index_sample_block.cloneWithColumns(std::move(mutable_columns_))) + , block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_))) { } @@ -67,10 +66,11 @@ void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const } size_serialization->serializeBinary(size(), ostr, {}); + size_t num_columns = block.columns(); - for (size_t i = 0; i < index_sample_block.columns(); ++i) + for (size_t i = 0; i < num_columns; ++i) { - const auto & type = index_sample_block.getByPosition(i).type; + const auto & type = block.getByPosition(i).type; ISerialization::SerializeBinaryBulkSettings settings; settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; }; @@ -92,8 +92,6 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd if (version != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version); - block.clear(); - Field field_rows; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr, {}); @@ -102,24 +100,22 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd if (rows_to_read == 0) return; - for (size_t i = 0; i < index_sample_block.columns(); ++i) + size_t num_columns = block.columns(); + + ISerialization::DeserializeBinaryBulkSettings settings; + settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; + settings.position_independent_encoding = false; + + for (size_t i = 0; i < num_columns; ++i) { - const auto & column = index_sample_block.getByPosition(i); - const auto & type = column.type; - ColumnPtr new_column = type->createColumn(); - - - ISerialization::DeserializeBinaryBulkSettings settings; - settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; - settings.position_independent_encoding = false; + auto & elem = block.getByPosition(i); + elem.column = elem.column->cloneEmpty(); ISerialization::DeserializeBinaryBulkStatePtr state; - auto serialization = type->getDefaultSerialization(); + auto serialization = elem.type->getDefaultSerialization(); - serialization->deserializeBinaryBulkStatePrefix(settings, state); - serialization->deserializeBinaryBulkWithMultipleStreams(new_column, rows_to_read, settings, state, nullptr); - - block.insert(ColumnWithTypeAndName(new_column, type, column.name)); + serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); + serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr); } } @@ -261,8 +257,13 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( if (!filter_dag) return; - if (checkDAGUseless(*filter_dag->getOutputs().at(0), context)) + std::vector sets_to_prepare; + if (checkDAGUseless(*filter_dag->getOutputs().at(0), context, sets_to_prepare)) return; + /// Try to run subqueries, don't use index if failed (e.g. if use_index_for_in_with_subqueries is disabled). + for (auto & set : sets_to_prepare) + if (!set->buildOrderedSetInplace(context)) + return; auto filter_actions_dag = filter_dag->clone(); const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0); @@ -272,6 +273,8 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( filter_actions_dag->removeUnusedActions(); actions = std::make_shared(filter_actions_dag); + + actions_output_column_name = filter_actions_dag->getOutputs().at(0)->result_name; } bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const @@ -284,48 +287,44 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx if (isUseless()) return true; - auto granule = std::dynamic_pointer_cast(idx_granule); - if (!granule) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Set index condition got a granule with the wrong type"); + const MergeTreeIndexGranuleSet & granule = assert_cast(*idx_granule); - if (isUseless() || granule->empty() || (max_rows != 0 && granule->size() > max_rows)) + size_t size = granule.size(); + if (size == 0 || (max_rows != 0 && size > max_rows)) return true; - Block result = granule->block; + Block result = granule.block; actions->execute(result); - const auto & filter_node_name = actions->getActionsDAG().getOutputs().at(0)->result_name; - auto column = result.getByName(filter_node_name).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); + const auto & column = result.getByName(actions_output_column_name).column; - if (column->onlyNull()) - return false; - - const auto * col_uint8 = typeid_cast(column.get()); - - const NullMap * null_map = nullptr; - - if (const auto * col_nullable = checkAndGetColumn(*column)) - { - col_uint8 = typeid_cast(&col_nullable->getNestedColumn()); - null_map = &col_nullable->getNullMapData(); - } - - if (!col_uint8) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "ColumnUInt8 expected as Set index condition result"); - - const auto & condition = col_uint8->getData(); - size_t column_size = column->size(); - - for (size_t i = 0; i < column_size; ++i) - if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1) + for (size_t i = 0; i < size; ++i) + if (!column->isNullAt(i) && (column->get64(i) & 1)) return true; return false; } +static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node & node, const ActionsDAGPtr & result_dag_or_null, ActionsDAG::NodeRawConstPtrs * storage) +{ + chassert(node.type == ActionsDAG::ActionType::FUNCTION); + if (node.function_base->getName() != "indexHint") + return node.children; + + /// indexHint arguments are stored inside of `FunctionIndexHint` class. + const auto & adaptor = typeid_cast(*node.function_base); + const auto & index_hint = typeid_cast(*adaptor.getFunction()); + if (!result_dag_or_null) + return index_hint.getActions()->getOutputs(); + + /// Import the DAG and map argument pointers. + ActionsDAGPtr actions_clone = index_hint.getActions()->clone(); + chassert(storage); + result_dag_or_null->mergeNodes(std::move(*actions_clone), storage); + return *storage; +} + const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDAG::Node & node, ActionsDAGPtr & result_dag, const ContextPtr & context, @@ -375,7 +374,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDA while (node_to_check->type == ActionsDAG::ActionType::ALIAS) node_to_check = node_to_check->children[0]; - if (node_to_check->column && isColumnConst(*node_to_check->column)) + if (node_to_check->column && (isColumnConst(*node_to_check->column) || WhichDataType(node.result_type).isSet())) return &node; RPNBuilderTreeContext tree_context(context); @@ -422,14 +421,15 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio while (node_to_check->type == ActionsDAG::ActionType::ALIAS) node_to_check = node_to_check->children[0]; - if (node_to_check->column && isColumnConst(*node_to_check->column)) + if (node_to_check->column && (isColumnConst(*node_to_check->column) || WhichDataType(node.result_type).isSet())) return nullptr; if (node_to_check->type != ActionsDAG::ActionType::FUNCTION) return nullptr; auto function_name = node_to_check->function->getName(); - const auto & arguments = node_to_check->children; + ActionsDAG::NodeRawConstPtrs temp_ptrs_to_argument; + const auto & arguments = getArguments(*node_to_check, result_dag, &temp_ptrs_to_argument); size_t arguments_size = arguments.size(); if (function_name == "not") @@ -444,7 +444,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio } else if (function_name == "and" || function_name == "indexHint" || function_name == "or") { - if (arguments_size < 2) + if (arguments_size < 1) return nullptr; ActionsDAG::NodeRawConstPtrs children; @@ -463,18 +463,12 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio const auto * last_argument = children.back(); children.pop_back(); - const auto * before_last_argument = children.back(); - children.pop_back(); - - while (true) + while (!children.empty()) { - last_argument = &result_dag->addFunction(function, {before_last_argument, last_argument}, {}); - - if (children.empty()) - break; - - before_last_argument = children.back(); + const auto * before_last_argument = children.back(); children.pop_back(); + + last_argument = &result_dag->addFunction(function, {before_last_argument, last_argument}, {}); } return last_argument; @@ -483,7 +477,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio return nullptr; } -bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, bool atomic) const +bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, std::vector & sets_to_prepare, bool atomic) const { const auto * node_to_check = &node; while (node_to_check->type == ActionsDAG::ActionType::ALIAS) @@ -492,8 +486,13 @@ bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, RPNBuilderTreeContext tree_context(context); RPNBuilderTreeNode tree_node(node_to_check, tree_context); - if (node.column && isColumnConst(*node.column) - && !WhichDataType(node.result_type).isSet()) + if (WhichDataType(node.result_type).isSet()) + { + if (auto set = tree_node.tryGetPreparedSet()) + sets_to_prepare.push_back(set); + return false; + } + else if (node.column && isColumnConst(*node.column)) { Field literal; node.column->get(0, literal); @@ -506,173 +505,33 @@ bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, return false; auto function_name = node.function_base->getName(); - const auto & arguments = node.children; + const auto & arguments = getArguments(node, nullptr, nullptr); if (function_name == "and" || function_name == "indexHint") - return std::all_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, atomic); }); + { + /// Can't use std::all_of() because we have to call checkDAGUseless() for all arguments + /// to populate sets_to_prepare. + bool all_useless = true; + for (const auto & arg : arguments) + { + bool u = checkDAGUseless(*arg, context, sets_to_prepare, atomic); + all_useless = all_useless && u; + } + return all_useless; + } else if (function_name == "or") - return std::any_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, atomic); }); + return std::any_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, sets_to_prepare, atomic); }); else if (function_name == "not") - return checkDAGUseless(*arguments.at(0), context, atomic); + return checkDAGUseless(*arguments.at(0), context, sets_to_prepare, atomic); else return std::any_of(arguments.begin(), arguments.end(), - [&](const auto & arg) { return checkDAGUseless(*arg, context, true /*atomic*/); }); + [&](const auto & arg) { return checkDAGUseless(*arg, context, sets_to_prepare, true /*atomic*/); }); } auto column_name = tree_node.getColumnName(); return !key_columns.contains(column_name); } -void MergeTreeIndexConditionSet::traverseAST(ASTPtr & node) const -{ - if (operatorFromAST(node)) - { - auto & args = node->as()->arguments->children; - - for (auto & arg : args) - traverseAST(arg); - return; - } - - if (atomFromAST(node)) - { - if (node->as() || node->as()) - /// __bitWrapperFunc* uses default implementation for Nullable types - /// Here we additionally convert Null to 0, - /// otherwise condition 'something OR NULL' will always return Null and filter everything. - node = makeASTFunction("__bitWrapperFunc", makeASTFunction("ifNull", node, std::make_shared(Field(0)))); - } - else - node = std::make_shared(UNKNOWN_FIELD); -} - -bool MergeTreeIndexConditionSet::atomFromAST(ASTPtr & node) const -{ - /// Function, literal or column - - if (node->as()) - return true; - - if (const auto * identifier = node->as()) - return key_columns.contains(identifier->getColumnName()); - - if (auto * func = node->as()) - { - if (key_columns.contains(func->getColumnName())) - { - /// Function is already calculated. - node = std::make_shared(func->getColumnName()); - return true; - } - - auto & args = func->arguments->children; - - for (auto & arg : args) - if (!atomFromAST(arg)) - return false; - - return true; - } - - return false; -} - -bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) -{ - /// Functions AND, OR, NOT. Replace with bit*. - auto * func = node->as(); - if (!func) - return false; - - auto & args = func->arguments->children; - - if (func->name == "not") - { - if (args.size() != 1) - return false; - - func->name = "__bitSwapLastTwo"; - } - else if (func->name == "and" || func->name == "indexHint") - { - if (args.size() < 2) - return false; - - auto last_arg = args.back(); - args.pop_back(); - - ASTPtr new_func; - if (args.size() > 1) - new_func = makeASTFunction( - "__bitBoolMaskAnd", - node, - last_arg); - else - new_func = makeASTFunction( - "__bitBoolMaskAnd", - args.back(), - last_arg); - - node = new_func; - } - else if (func->name == "or") - { - if (args.size() < 2) - return false; - - auto last_arg = args.back(); - args.pop_back(); - - ASTPtr new_func; - if (args.size() > 1) - new_func = makeASTFunction( - "__bitBoolMaskOr", - node, - last_arg); - else - new_func = makeASTFunction( - "__bitBoolMaskOr", - args.back(), - last_arg); - - node = new_func; - } - else - return false; - - return true; -} - -bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomic) const -{ - if (!node) - return true; - - if (const auto * func = node->as()) - { - if (key_columns.contains(func->getColumnName())) - return false; - - const ASTs & args = func->arguments->children; - - if (func->name == "and" || func->name == "indexHint") - return std::all_of(args.begin(), args.end(), [this, atomic](const auto & arg) { return checkASTUseless(arg, atomic); }); - else if (func->name == "or") - return std::any_of(args.begin(), args.end(), [this, atomic](const auto & arg) { return checkASTUseless(arg, atomic); }); - else if (func->name == "not") - return checkASTUseless(args[0], atomic); - else - return std::any_of(args.begin(), args.end(), - [this](const auto & arg) { return checkASTUseless(arg, true); }); - } - else if (const auto * literal = node->as()) - return !atomic && literal->value.safeGet(); - else if (const auto * identifier = node->as()) - return !key_columns.contains(identifier->getColumnName()); - else - return true; -} - MergeTreeIndexGranulePtr MergeTreeIndexSet::createIndexGranule() const { diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 7c66ba1a867..6efc2effafd 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -34,7 +34,6 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const String index_name; const size_t max_rows; - const Block index_sample_block; Block block; }; @@ -107,15 +106,7 @@ private: const ContextPtr & context, std::unordered_map & node_to_result_node) const; - bool checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, bool atomic = false) const; - - void traverseAST(ASTPtr & node) const; - - bool atomFromAST(ASTPtr & node) const; - - static bool operatorFromAST(ASTPtr & node); - - bool checkASTUseless(const ASTPtr & node, bool atomic = false) const; + bool checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, std::vector & sets_to_prepare, bool atomic = false) const; String index_name; size_t max_rows; @@ -127,6 +118,7 @@ private: std::unordered_set key_columns; ExpressionActionsPtr actions; + String actions_output_column_name; }; diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp index 8ab7d785892..bded961db8e 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -137,9 +137,16 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerValidator("usearch", usearchIndexValidator); #endif + registerCreator("inverted", fullTextIndexCreator); + registerValidator("inverted", fullTextIndexValidator); + + /// ------ + /// TODO: remove this block at the end of 2024. + /// Index type 'inverted' was renamed to 'full_text' in May 2024. + /// To support loading tables with old indexes during a transition period, register full-text indexes under their old name. registerCreator("full_text", fullTextIndexCreator); registerValidator("full_text", fullTextIndexValidator); - + /// ------ } MergeTreeIndexFactory & MergeTreeIndexFactory::instance() diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index eae7594448a..168134a329f 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -64,6 +64,10 @@ MergeTreeMarksLoader::MergeTreeMarksLoader( , read_settings(read_settings_) , num_columns_in_mark(num_columns_in_mark_) , load_marks_threadpool(load_marks_threadpool_) +{ +} + +void MergeTreeMarksLoader::startAsyncLoad() { if (load_marks_threadpool) future = loadMarksAsync(); @@ -102,6 +106,8 @@ MergeTreeMarksGetterPtr MergeTreeMarksLoader::loadMarks() MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() { + LOG_TEST(getLogger("MergeTreeMarksLoader"), "Loading marks from path {}", mrk_path); + /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; @@ -218,7 +224,9 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync() } } else + { loaded_marks = loadMarksImpl(); + } if (!loaded_marks) { diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h index 73dd462f2fa..2aa4474e1c5 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.h +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h @@ -50,6 +50,7 @@ public: ~MergeTreeMarksLoader(); + void startAsyncLoad(); MergeTreeMarksGetterPtr loadMarks(); size_t getNumColumns() const { return num_columns_in_mark; } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index ddeaf69136a..b240f80ee13 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -94,7 +94,7 @@ namespace } void operator() (const IPv6 & x) const { - return operator()(String(reinterpret_cast(&x), 16)); + operator()(String(reinterpret_cast(&x), 16)); } void operator() (const Float64 & x) const { @@ -413,12 +413,12 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file, {}); } -std::unique_ptr MergeTreePartition::store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const +std::unique_ptr MergeTreePartition::store( + StorageMetadataPtr metadata_snapshot, ContextPtr storage_context, + IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const { - auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - const auto & context = storage.getContext(); - const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block; - return store(partition_key_sample, data_part_storage, checksums, context->getWriteSettings()); + const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage_context).sample_block; + return store(partition_key_sample, data_part_storage, checksums, storage_context->getWriteSettings()); } std::unique_ptr MergeTreePartition::store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 78b141f26ec..44def70bdd9 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -44,7 +44,9 @@ public: /// Store functions return write buffer with written but not finalized data. /// User must call finish() for returned object. - [[nodiscard]] std::unique_ptr store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const; + [[nodiscard]] std::unique_ptr store( + StorageMetadataPtr metadata_snapshot, ContextPtr storage_context, + IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const; [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const; void assign(const MergeTreePartition & other) { value = other.value; } diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h index 43d8ebdd6d3..3cf270946d8 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.h +++ b/src/Storages/MergeTree/MergeTreePartsMover.h @@ -12,7 +12,7 @@ namespace DB { -enum class MovePartsOutcome +enum class MovePartsOutcome : uint8_t { PartsMoved, NothingToMove, diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 6d2875b8d9f..2c249f7b63b 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -46,49 +46,30 @@ bool MergeTreePrefetchedReadPool::TaskHolder::operator<(const TaskHolder & other } -MergeTreePrefetchedReadPool::PrefetchedReaders::~PrefetchedReaders() -{ - for (auto & prefetch_future : prefetch_futures) - if (prefetch_future.valid()) - prefetch_future.wait(); -} - MergeTreePrefetchedReadPool::PrefetchedReaders::PrefetchedReaders( + ThreadPool & pool, MergeTreeReadTask::Readers readers_, Priority priority_, - MergeTreePrefetchedReadPool & pool_) + MergeTreePrefetchedReadPool & read_prefetch) : is_valid(true) , readers(std::move(readers_)) + , prefetch_runner(pool, "ReadPrepare") { - try + prefetch_runner(read_prefetch.createPrefetchedTask(readers.main.get(), priority_)); + + for (const auto & reader : readers.prewhere) + prefetch_runner(read_prefetch.createPrefetchedTask(reader.get(), priority_)); + + fiu_do_on(FailPoints::prefetched_reader_pool_failpoint, { - prefetch_futures.reserve(1 + readers.prewhere.size()); - - prefetch_futures.push_back(pool_.createPrefetchedFuture(readers.main.get(), priority_)); - - for (const auto & reader : readers.prewhere) - prefetch_futures.push_back(pool_.createPrefetchedFuture(reader.get(), priority_)); - - fiu_do_on(FailPoints::prefetched_reader_pool_failpoint, - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failpoint for prefetched reader enabled"); - }); - } - catch (...) /// in case of memory exceptions we have to wait - { - for (auto & prefetch_future : prefetch_futures) - if (prefetch_future.valid()) - prefetch_future.wait(); - - throw; - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failpoint for prefetched reader enabled"); + }); } void MergeTreePrefetchedReadPool::PrefetchedReaders::wait() { ProfileEventTimeIncrement watch(ProfileEvents::WaitPrefetchTaskMicroseconds); - for (auto & prefetch_future : prefetch_futures) - prefetch_future.wait(); + prefetch_runner.waitForAllToFinish(); } MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get() @@ -96,13 +77,7 @@ MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get() SCOPE_EXIT({ is_valid = false; }); ProfileEventTimeIncrement watch(ProfileEvents::WaitPrefetchTaskMicroseconds); - /// First wait for completion of all futures. - for (auto & prefetch_future : prefetch_futures) - prefetch_future.wait(); - - /// Then rethrow first exception if any. - for (auto & prefetch_future : prefetch_futures) - prefetch_future.get(); + prefetch_runner.waitForAllToFinishAndRethrowFirstError(); return std::move(readers); } @@ -139,7 +114,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( fillPerThreadTasks(pool_settings.threads, pool_settings.sum_marks); } -std::future MergeTreePrefetchedReadPool::createPrefetchedFuture(IMergeTreeReader * reader, Priority priority) +std::function MergeTreePrefetchedReadPool::createPrefetchedTask(IMergeTreeReader * reader, Priority priority) { /// In order to make a prefetch we need to wait for marks to be loaded. But we just created /// a reader (which starts loading marks in its constructor), then if we do prefetch right @@ -147,14 +122,12 @@ std::future MergeTreePrefetchedReadPool::createPrefetchedFuture(IMergeTree /// only inside this MergeTreePrefetchedReadPool, where read tasks are created and distributed, /// and we cannot block either, therefore make prefetch inside the pool and put the future /// into the thread task. When a thread calls getTask(), it will wait for it is not ready yet. - auto task = [=, context = getContext()]() mutable + return [=, context = getContext()]() mutable { /// For async read metrics in system.query_log. PrefetchIncrement watch(context->getAsyncReadCounters()); reader->prefetchBeginOfRange(priority); }; - - return scheduleFromThreadPoolUnsafe(std::move(task), prefetch_threadpool, "ReadPrepare", priority); } void MergeTreePrefetchedReadPool::createPrefetchedReadersForTask(ThreadTask & task) @@ -164,7 +137,7 @@ void MergeTreePrefetchedReadPool::createPrefetchedReadersForTask(ThreadTask & ta auto extras = getExtras(); auto readers = MergeTreeReadTask::createReaders(task.read_info, extras, task.ranges); - task.readers_future = std::make_unique(std::move(readers), task.priority, *this); + task.readers_future = std::make_unique(prefetch_threadpool, std::move(readers), task.priority, *this); } void MergeTreePrefetchedReadPool::startPrefetches() @@ -384,6 +357,15 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics() } } +namespace +{ +ALWAYS_INLINE inline String getPartNameForLogging(const DataPartPtr & part) +{ + return part->isProjectionPart() ? fmt::format("{}.{}", part->name, part->getParentPartName()) : part->name; +} +} + + void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_marks) { if (per_part_infos.empty()) @@ -438,7 +420,7 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_ LOG_DEBUG( log, "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", - parts_ranges[i].data_part->name, + getPartNameForLogging(parts_ranges[i].data_part), part_stat.sum_marks, part_stat.approx_size_of_mark, settings.filesystem_prefetch_step_bytes, @@ -522,7 +504,9 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_ throw Exception( ErrorCodes::LOGICAL_ERROR, "Requested {} marks from part {}, but part has only {} marks", - marks_to_get_from_part, per_part_infos[part_idx]->data_part->name, part_stat.sum_marks); + marks_to_get_from_part, + getPartNameForLogging(per_part_infos[part_idx]->data_part), + part_stat.sum_marks); } size_t num_marks_to_get = marks_to_get_from_part; @@ -598,7 +582,7 @@ std::string MergeTreePrefetchedReadPool::dumpTasks(const TasksPerThread & tasks) result << '\t'; result << ++no << ": "; result << "reader future: " << task->isValidReadersFuture() << ", "; - result << "part: " << task->read_info->data_part->name << ", "; + result << "part: " << getPartNameForLogging(task->read_info->data_part) << ", "; result << "ranges: " << toString(task->ranges); } } diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 0c8a6716d40..a3a57227630 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -51,18 +52,18 @@ private: class PrefetchedReaders { public: - PrefetchedReaders() = default; - PrefetchedReaders(MergeTreeReadTask::Readers readers_, Priority priority_, MergeTreePrefetchedReadPool & pool_); + PrefetchedReaders( + ThreadPool & pool, MergeTreeReadTask::Readers readers_, Priority priority_, MergeTreePrefetchedReadPool & read_prefetch); void wait(); MergeTreeReadTask::Readers get(); bool valid() const { return is_valid; } - ~PrefetchedReaders(); private: bool is_valid = false; MergeTreeReadTask::Readers readers; - std::vector> prefetch_futures; + + ThreadPoolCallbackRunnerLocal prefetch_runner; }; struct ThreadTask @@ -108,7 +109,7 @@ private: void startPrefetches(); void createPrefetchedReadersForTask(ThreadTask & task); - std::future createPrefetchedFuture(IMergeTreeReader * reader, Priority priority); + std::function createPrefetchedTask(IMergeTreeReader * reader, Priority priority); MergeTreeReadTaskPtr stealTask(size_t thread, MergeTreeReadTask * previous_task); MergeTreeReadTaskPtr createTask(ThreadTask & thread_task, MergeTreeReadTask * previous_task); diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index eb757e1d8c7..e88ded5437d 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -28,6 +28,12 @@ # pragma clang diagnostic ignored "-Wreserved-identifier" #endif +namespace ProfileEvents +{ +extern const Event RowsReadByMainReader; +extern const Event RowsReadByPrewhereReaders; +} + namespace DB { namespace ErrorCodes @@ -681,8 +687,9 @@ size_t numZerosInTail(const UInt8 * begin, const UInt8 * end) return count; } } - while (end > begin && *(--end) == 0) + while (end > begin && end[-1] == 0) { + --end; ++count; } return count; @@ -715,8 +722,9 @@ size_t numZerosInTail(const UInt8 * begin, const UInt8 * end) return count; } } - while (end > begin && *(--end) == 0) + while (end > begin && end[-1] == 0) { + --end; ++count; } return count; @@ -793,8 +801,9 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con } #endif - while (end > begin && *(--end) == 0) + while (end > begin && end[-1] == 0) { + --end; ++count; } return count; @@ -804,12 +813,14 @@ MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, const PrewhereExprStep * prewhere_info_, - bool last_reader_in_chain_) + bool last_reader_in_chain_, + bool main_reader_) : merge_tree_reader(merge_tree_reader_) , index_granularity(&(merge_tree_reader->data_part_info_for_read->getIndexGranularity())) , prev_reader(prev_reader_) , prewhere_info(prewhere_info_) , last_reader_in_chain(last_reader_in_chain_) + , main_reader(main_reader_) , is_initialized(true) { if (prev_reader) @@ -1007,6 +1018,10 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar filterColumns(columns, read_result.final_filter); } + /// If columns not empty, then apply on-fly alter conversions if any required + if (!prewhere_info || prewhere_info->perform_alter_conversions) + merge_tree_reader->performRequiredConversions(columns); + /// If some columns absent in part, then evaluate default values if (should_evaluate_missing_defaults) { @@ -1017,10 +1032,6 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar addDummyColumnWithRowCount(additional_columns, read_result.num_rows); merge_tree_reader->evaluateMissingDefaults(additional_columns, columns); } - - /// If columns not empty, then apply on-fly alter conversions if any required - if (!prewhere_info || prewhere_info->perform_alter_conversions) - merge_tree_reader->performRequiredConversions(columns); } read_result.columns.reserve(read_result.columns.size() + columns.size()); @@ -1046,14 +1057,14 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar bool should_evaluate_missing_defaults; merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, read_result.num_rows); - /// If some columns absent in part, then evaluate default values - if (should_evaluate_missing_defaults) - merge_tree_reader->evaluateMissingDefaults({}, columns); - /// If result not empty, then apply on-fly alter conversions if any required if (!prewhere_info || prewhere_info->perform_alter_conversions) merge_tree_reader->performRequiredConversions(columns); + /// If some columns absent in part, then evaluate default values + if (should_evaluate_missing_defaults) + merge_tree_reader->evaluateMissingDefaults({}, columns); + for (size_t i = 0; i < columns.size(); ++i) read_result.columns[i] = std::move(columns[i]); } @@ -1147,6 +1158,10 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t result.adjustLastGranule(); fillVirtualColumns(result, leading_begin_part_offset, leading_end_part_offset); + + ProfileEvents::increment(ProfileEvents::RowsReadByMainReader, main_reader * result.numReadRows()); + ProfileEvents::increment(ProfileEvents::RowsReadByPrewhereReaders, (!main_reader) * result.numReadRows()); + return result; } @@ -1255,6 +1270,9 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si throw Exception(ErrorCodes::LOGICAL_ERROR, "RangeReader read {} rows, but {} expected.", num_rows, result.total_rows_per_granule); + ProfileEvents::increment(ProfileEvents::RowsReadByMainReader, main_reader * num_rows); + ProfileEvents::increment(ProfileEvents::RowsReadByPrewhereReaders, (!main_reader) * num_rows); + return columns; } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index b282ada6038..7acc8cd88b4 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -101,7 +101,8 @@ public: IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, const PrewhereExprStep * prewhere_info_, - bool last_reader_in_chain_); + bool last_reader_in_chain_, + bool main_reader_); MergeTreeRangeReader() = default; @@ -326,6 +327,7 @@ private: Block result_sample_block; /// Block with columns that are returned by this step. bool last_reader_in_chain = false; + bool main_reader = false; /// Whether it is the main reader or one of the readers for prewhere steps bool is_initialized = false; LoggerPtr log = getLogger("MergeTreeRangeReader"); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index e525f7f5f65..dc1ba030f45 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -80,8 +80,7 @@ MergeTreeReadPool::MergeTreeReadPool( /// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible. const auto heuristic_min_marks = std::min(total_marks / pool_settings.threads, min_bytes_per_task / avg_mark_bytes); - if (heuristic_min_marks > min_marks_for_concurrent_read) - min_marks_for_concurrent_read = heuristic_min_marks; + min_marks_for_concurrent_read = std::max(heuristic_min_marks, min_marks_for_concurrent_read); } } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 0cbb0a86b2f..0ea19370d45 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -6,6 +6,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + MergeTreeReadPoolBase::MergeTreeReadPoolBase( RangesInDataParts && parts_, VirtualFields shared_virtual_fields_, @@ -48,6 +53,19 @@ void MergeTreeReadPoolBase::fillPerPartInfos() MergeTreeReadTaskInfo read_task_info; read_task_info.data_part = part_with_ranges.data_part; + + const auto & data_part = read_task_info.data_part; + if (data_part->isProjectionPart()) + { + read_task_info.parent_part = data_part->storage.getPartIfExists( + data_part->getParentPartName(), + {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + + if (!read_task_info.parent_part) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Did not find parent part {} for projection part {}", + data_part->getParentPartName(), data_part->getDataPartStorage().getFullPath()); + } + read_task_info.part_index_in_query = part_with_ranges.part_index_in_query; read_task_info.alter_conversions = part_with_ranges.alter_conversions; @@ -113,9 +131,25 @@ MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask( ? std::make_unique(*read_info->shared_size_predictor) : nullptr; /// make a copy - auto get_part_name = [](const auto & task_info) -> const String & + auto get_part_name = [](const auto & task_info) -> String { - return task_info.data_part->isProjectionPart() ? task_info.data_part->getParentPart()->name : task_info.data_part->name; + const auto & data_part = task_info.data_part; + + if (data_part->isProjectionPart()) + { + auto parent_part_name = data_part->getParentPartName(); + + auto parent_part = data_part->storage.getPartIfExists( + parent_part_name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + + if (!parent_part) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Did not find parent part {} for projection part {}", + parent_part_name, data_part->getDataPartStorage().getFullPath()); + + return parent_part_name; + } + + return data_part->name; }; auto extras = getExtras(); diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index 08b30e445e2..177a325ea5a 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -83,7 +83,8 @@ MergeTreeReadTask::createRangeReaders(const Readers & task_readers, const Prewhe { last_reader = task_readers.main->getColumns().empty() && (i + 1 == prewhere_actions.steps.size()); - MergeTreeRangeReader current_reader(task_readers.prewhere[i].get(), prev_reader, prewhere_actions.steps[i].get(), last_reader); + MergeTreeRangeReader current_reader( + task_readers.prewhere[i].get(), prev_reader, prewhere_actions.steps[i].get(), last_reader, /*main_reader_=*/false); new_range_readers.prewhere.push_back(std::move(current_reader)); prev_reader = &new_range_readers.prewhere.back(); @@ -91,7 +92,7 @@ MergeTreeReadTask::createRangeReaders(const Readers & task_readers, const Prewhe if (!last_reader) { - new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true); + new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true, /*main_reader_=*/true); } else { diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index c8bb501c0e8..794e70a0fbb 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -23,7 +23,7 @@ using MergeTreeReaderPtr = std::unique_ptr; using VirtualFields = std::unordered_map; -enum class MergeTreeReadType +enum class MergeTreeReadType : uint8_t { /// By default, read will use MergeTreeReadPool and return pipe with num_streams outputs. /// If num_streams == 1, will read without pool, in order specified in parts. @@ -56,6 +56,8 @@ struct MergeTreeReadTaskInfo { /// Data part which should be read while performing this task DataPartPtr data_part; + /// Parent part of the projection part + DataPartPtr parent_part; /// For `part_index` virtual column size_t part_index_in_query; /// Alter converversionss that should be applied on-fly for part. diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 53acfd539fb..a2b8f0ad96f 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -48,6 +48,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( , profile_callback(profile_callback_) , clock_type(clock_type_) { + marks_loader->startAsyncLoad(); } void MergeTreeReaderCompact::fillColumnPositions() @@ -195,7 +196,7 @@ void MergeTreeReaderCompact::readPrefix( deserialize_settings.getter = buffer_getter_for_prefix; ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix; - serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix, nullptr); } SerializationPtr serialization; @@ -205,7 +206,8 @@ void MergeTreeReaderCompact::readPrefix( serialization = getSerializationInPart(name_and_type); deserialize_settings.getter = buffer_getter; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name]); + deserialize_settings.dynamic_read_statistics = true; + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name], nullptr); } catch (Exception & e) { @@ -231,7 +233,7 @@ bool MergeTreeReaderCompact::needSkipStream(size_t column_pos, const ISerializat /// /// Consider the following columns in nested "root": /// - root.array Array(UInt8) - exists - /// - root.nested_array Array(Array(UInt8)) - does not exists (only_offsets_level=1) + /// - root.nested_array Array(Array(UInt8)) - does not exist (only_offsets_level=1) /// /// For root.nested_array it will try to read multiple streams: /// - offsets (substream_path = {ArraySizes}) diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index 40a16176c69..15ef02440cb 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; extern const int CANNOT_READ_ALL_DATA; + extern const int LOGICAL_ERROR; } MergeTreeReaderStream::MergeTreeReaderStream( @@ -41,14 +42,17 @@ MergeTreeReaderStream::MergeTreeReaderStream( { } +void MergeTreeReaderStream::loadMarks() +{ + if (!marks_getter) + marks_getter = marks_loader->loadMarks(); +} + void MergeTreeReaderStream::init() { if (initialized) return; - initialized = true; - marks_getter = marks_loader->loadMarks(); - /// Compute the size of the buffer. auto [max_mark_range_bytes, sum_mark_range_bytes] = estimateMarkRangeBytes(all_mark_ranges); @@ -110,11 +114,15 @@ void MergeTreeReaderStream::init() data_buffer = non_cached_buffer.get(); compressed_data_buffer = non_cached_buffer.get(); } + + initialized = true; } void MergeTreeReaderStream::seekToMarkAndColumn(size_t row_index, size_t column_position) { init(); + loadMarks(); + const auto & mark = marks_getter->getMark(row_index, column_position); try @@ -193,7 +201,7 @@ CompressedReadBufferBase * MergeTreeReaderStream::getCompressedDataBuffer() return compressed_data_buffer; } -size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) const +size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) { /// NOTE: if we are reading the whole file, then right_mark == marks_count /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. @@ -202,7 +210,8 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) cons if (marks_count == 0) return 0; - assert(right_mark <= marks_count); + chassert(right_mark <= marks_count); + loadMarks(); if (right_mark == 0) return marks_getter->getMark(right_mark, 0).offset_in_compressed_file; @@ -281,9 +290,9 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) cons return file_size; } -std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const +std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges) { - assert(marks_getter != nullptr); + loadMarks(); size_t max_range_bytes = 0; size_t sum_range_bytes = 0; @@ -302,7 +311,34 @@ std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBy return {max_range_bytes, sum_range_bytes}; } -size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const +size_t MergeTreeReaderStreamSingleColumnWholePart::getRightOffset(size_t right_mark) +{ + if (right_mark != marks_count) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected one right mark: {}, got: {}", + marks_count, right_mark); + } + return file_size; +} + +std::pair MergeTreeReaderStreamSingleColumnWholePart::estimateMarkRangeBytes(const MarkRanges & mark_ranges) +{ + if (!mark_ranges.isOneRangeForWholePart(marks_count)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected one mark range that covers the whole part, got: {}", + mark_ranges.describe()); + } + return {file_size, file_size}; +} + +void MergeTreeReaderStreamSingleColumnWholePart::seekToMark(size_t) +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeReaderStreamSingleColumnWholePart cannot seek to marks"); +} + +size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) { /// NOTE: if we are reading the whole file, then right_mark == marks_count /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. @@ -311,7 +347,8 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ if (marks_count == 0) return 0; - assert(right_mark_non_included <= marks_count); + chassert(right_mark_non_included <= marks_count); + loadMarks(); if (right_mark_non_included == 0) return marks_getter->getMark(right_mark_non_included, column_position).offset_in_compressed_file; @@ -347,9 +384,9 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ } std::pair -MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const +MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) { - assert(marks_getter != nullptr); + loadMarks(); /// As a maximal range we return the maximal size of a whole stripe. size_t max_range_bytes = 0; @@ -386,8 +423,9 @@ MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const Mark return {max_range_bytes, sum_range_bytes}; } -MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position) const +MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position) { + loadMarks(); const auto & current_mark = marks_getter->getMark(row_index, column_position); if (marks_getter->getNumColumns() == 1) @@ -434,27 +472,27 @@ MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeM return marks_getter->getMark(mark_index + 1, column_position + 1); } -size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const +size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included) { return getRightOffsetOneColumn(right_mark_non_included, column_position); } -std::pair MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const +std::pair MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) { return estimateMarkRangeBytesOneColumn(mark_ranges, column_position); } -size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const +size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included) { return getRightOffsetOneColumn(right_mark_non_included, marks_loader->getNumColumns() - 1); } -std::pair MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const +std::pair MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) { size_t max_range_bytes = 0; size_t sum_range_bytes = 0; - for (size_t i = 0; i < marks_getter->getNumColumns(); ++i) + for (size_t i = 0; i < marks_loader->getNumColumns(); ++i) { auto [current_max, current_sum] = estimateMarkRangeBytesOneColumn(mark_ranges, i); diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index f3ca6953ceb..05341cd8acc 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -40,6 +40,7 @@ public: /// Seeks to exact mark in file. void seekToMarkAndColumn(size_t row_index, size_t column_position); + /// Seeks to the start of the file. void seekToStart(); /** @@ -53,11 +54,11 @@ public: private: /// Returns offset in file up to which it's needed to read file to read all rows up to @right_mark mark. - virtual size_t getRightOffset(size_t right_mark) const = 0; + virtual size_t getRightOffset(size_t right_mark) = 0; /// Returns estimated max amount of bytes to read among mark ranges (which is used as size for read buffer) /// and total amount of bytes to read in all mark ranges. - virtual std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const = 0; + virtual std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) = 0; const ReadBufferFromFileBase::ProfileCallback profile_callback; const clockid_t clock_type; @@ -80,6 +81,7 @@ private: protected: void init(); + void loadMarks(); const MergeTreeReaderSettings settings; const size_t marks_count; @@ -100,11 +102,25 @@ public: { } - size_t getRightOffset(size_t right_mark_non_included) const override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; + size_t getRightOffset(size_t right_mark_non_included) override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); } }; +class MergeTreeReaderStreamSingleColumnWholePart : public MergeTreeReaderStream +{ +public: + template + explicit MergeTreeReaderStreamSingleColumnWholePart(Args &&... args) + : MergeTreeReaderStream{std::forward(args)...} + { + } + + size_t getRightOffset(size_t right_mark_non_included) override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; + void seekToMark(size_t row_index) override; +}; + /// Base class for reading from file that contains multiple columns. /// It is used to read from compact parts. /// See more details about data layout in MergeTreeDataPartCompact.h. @@ -118,9 +134,9 @@ public: } protected: - size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const; - std::pair estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const; - MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position) const; + size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position); + std::pair estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position); + MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position); }; /// Class for reading a single column from file that contains multiple columns @@ -135,8 +151,8 @@ public: { } - size_t getRightOffset(size_t right_mark_non_included) const override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; + size_t getRightOffset(size_t right_mark_non_included) override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, column_position); } private: @@ -154,8 +170,8 @@ public: { } - size_t getRightOffset(size_t right_mark_non_included) const override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; + size_t getRightOffset(size_t right_mark_non_included) override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); } }; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 394a22835f1..b6882fdced9 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -43,11 +43,16 @@ MergeTreeReaderWide::MergeTreeReaderWide( mark_ranges_, settings_, avg_value_size_hints_) + , profile_callback(profile_callback_) + , clock_type(clock_type_) + , read_without_marks( + settings.can_read_part_without_marks + && all_mark_ranges.isOneRangeForWholePart(data_part_info_for_read->getMarksCount())) { try { for (size_t i = 0; i < columns_to_read.size(); ++i) - addStreams(columns_to_read[i], serializations[i], profile_callback_, clock_type_); + addStreams(columns_to_read[i], serializations[i]); } catch (...) { @@ -100,9 +105,10 @@ void MergeTreeReaderWide::prefetchForAllColumns( try { auto & cache = caches[columns_to_read[pos].getNameInStorage()]; + auto & deserialize_states_cache = deserialize_states_caches[columns_to_read[pos].getNameInStorage()]; prefetchForColumn( priority, columns_to_read[pos], serializations[pos], from_mark, continue_reading, - current_task_last_mark, cache); + current_task_last_mark, cache, deserialize_states_cache); } catch (Exception & e) { @@ -147,11 +153,12 @@ size_t MergeTreeReaderWide::readRows( { size_t column_size_before_reading = column->size(); auto & cache = caches[column_to_read.getNameInStorage()]; + auto & deserialize_states_cache = deserialize_states_caches[column_to_read.getNameInStorage()]; readData( column_to_read, serializations[pos], column, from_mark, continue_reading, current_task_last_mark, - max_rows_to_read, cache, /* was_prefetched =*/ !prefetched_streams.empty()); + max_rows_to_read, cache, deserialize_states_cache, /* was_prefetched =*/ !prefetched_streams.empty()); /// For elements of Nested, column_size_before_reading may be greater than column size /// if offsets are not empty and were already read, but elements are empty. @@ -199,9 +206,7 @@ size_t MergeTreeReaderWide::readRows( void MergeTreeReaderWide::addStreams( const NameAndTypePair & name_and_type, - const SerializationPtr & serialization, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, - clockid_t clock_type) + const SerializationPtr & serialization) { bool has_any_stream = false; bool has_all_streams = true; @@ -225,29 +230,8 @@ void MergeTreeReaderWide::addStreams( return; } - auto context = data_part_info_for_read->getContext(); - auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; - - auto marks_loader = std::make_shared( - data_part_info_for_read, - mark_cache, - data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name), - data_part_info_for_read->getMarksCount(), - data_part_info_for_read->getIndexGranularityInfo(), - settings.save_marks_in_cache, - settings.read_settings, - load_marks_threadpool, - /*num_columns_in_mark=*/ 1); - + addStream(substream_path, *stream_name); has_any_stream = true; - auto stream_settings = settings; - stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; - - streams.emplace(*stream_name, std::make_unique( - data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION, - data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), - std::move(marks_loader), profile_callback, clock_type)); }; serialization->enumerateStreams(callback); @@ -256,11 +240,46 @@ void MergeTreeReaderWide::addStreams( partially_read_columns.insert(name_and_type.name); } -static ReadBuffer * getStream( +MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const ISerialization::SubstreamPath & substream_path, const String & stream_name) +{ + auto context = data_part_info_for_read->getContext(); + auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; + size_t num_marks_in_part = data_part_info_for_read->getMarksCount(); + + auto marks_loader = std::make_shared( + data_part_info_for_read, + mark_cache, + data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(stream_name), + num_marks_in_part, + data_part_info_for_read->getIndexGranularityInfo(), + settings.save_marks_in_cache, + settings.read_settings, + load_marks_threadpool, + /*num_columns_in_mark=*/ 1); + + auto stream_settings = settings; + stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; + + auto create_stream = [&]() + { + return std::make_unique( + data_part_info_for_read->getDataPartStorage(), stream_name, DATA_FILE_EXTENSION, + num_marks_in_part, all_mark_ranges, stream_settings, + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), + std::move(marks_loader), profile_callback, clock_type); + }; + + if (read_without_marks) + return streams.emplace(stream_name, create_stream.operator()()).first; + + marks_loader->startAsyncLoad(); + return streams.emplace(stream_name, create_stream.operator()()).first; +} + +ReadBuffer * MergeTreeReaderWide::getStream( bool seek_to_start, const ISerialization::SubstreamPath & substream_path, const MergeTreeDataPartChecksums & checksums, - MergeTreeReaderWide::FileStreams & streams, const NameAndTypePair & name_and_type, size_t from_mark, bool seek_to_mark, @@ -277,7 +296,13 @@ static ReadBuffer * getStream( auto it = streams.find(*stream_name); if (it == streams.end()) - return nullptr; + { + /// If we didn't create requested stream, but file with this path exists, create a stream for it. + /// It may happen during reading of columns with dynamic subcolumns, because all streams are known + /// only after deserializing of binary bulk prefix. + + it = addStream(substream_path, *stream_name); + } MergeTreeReaderStream & stream = *it->second; stream.adjustRightMark(current_task_last_mark); @@ -294,17 +319,19 @@ void MergeTreeReaderWide::deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache) + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { const auto & name = name_and_type.name; if (!deserialize_binary_bulk_state_map.contains(name)) { ISerialization::DeserializeBinaryBulkSettings deserialize_settings; + deserialize_settings.dynamic_read_statistics = true; deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); + return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); }; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]); + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name], &deserialize_states_cache); } } @@ -315,45 +342,56 @@ void MergeTreeReaderWide::prefetchForColumn( size_t from_mark, bool continue_reading, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache) + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { - deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); - - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); + auto callback = [&](const ISerialization::SubstreamPath & substream_path) { auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); if (stream_name && !prefetched_streams.contains(*stream_name)) { - bool seek_to_mark = !continue_reading; - if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) + bool seek_to_mark = !continue_reading && !read_without_marks; + if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) { buf->prefetch(priority); prefetched_streams.insert(*stream_name); } } - }); + }; + + auto data = ISerialization::SubstreamData(serialization).withType(name_and_type.type).withDeserializeState(deserialize_binary_bulk_state_map[name_and_type.name]); + ISerialization::EnumerateStreamsSettings settings; + serialization->enumerateStreams(settings, callback, data); } void MergeTreeReaderWide::readData( - const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column, - size_t from_mark, bool continue_reading, size_t current_task_last_mark, - size_t max_rows_to_read, ISerialization::SubstreamsCache & cache, bool was_prefetched) + const NameAndTypePair & name_and_type, + const SerializationPtr & serialization, + ColumnPtr & column, + size_t from_mark, + bool continue_reading, + size_t current_task_last_mark, + size_t max_rows_to_read, + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache, + bool was_prefetched) { double & avg_value_size_hint = avg_value_size_hints[name_and_type.name]; ISerialization::DeserializeBinaryBulkSettings deserialize_settings; deserialize_settings.avg_value_size_hint = avg_value_size_hint; - deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - bool seek_to_mark = !was_prefetched && !continue_reading; + bool seek_to_mark = !was_prefetched && !continue_reading && !read_without_marks; return getStream( /* seek_to_start = */false, substream_path, - data_part_info_for_read->getChecksums(), streams, + data_part_info_for_read->getChecksums(), name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache); }; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index a9a5526dd65..841c2dc567d 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -45,14 +45,31 @@ private: void addStreams( const NameAndTypePair & name_and_type, - const SerializationPtr & serialization, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, - clockid_t clock_type); + const SerializationPtr & serialization); + + ReadBuffer * getStream( + bool seek_to_start, + const ISerialization::SubstreamPath & substream_path, + const MergeTreeDataPartChecksums & checksums, + const NameAndTypePair & name_and_type, + size_t from_mark, + bool seek_to_mark, + size_t current_task_last_mark, + ISerialization::SubstreamsCache & cache); + + FileStreams::iterator addStream(const ISerialization::SubstreamPath & substream_path, const String & stream_name); void readData( - const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column, - size_t from_mark, bool continue_reading, size_t current_task_last_mark, size_t max_rows_to_read, - ISerialization::SubstreamsCache & cache, bool was_prefetched); + const NameAndTypePair & name_and_type, + const SerializationPtr & serialization, + ColumnPtr & column, + size_t from_mark, + bool continue_reading, + size_t current_task_last_mark, + size_t max_rows_to_read, + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache, + bool was_prefetched); /// Make next readData more simple by calling 'prefetch' of all related ReadBuffers (column streams). void prefetchForColumn( @@ -62,17 +79,23 @@ private: size_t from_mark, bool continue_reading, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache); + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache); void deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache); + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache); std::unordered_map caches; + std::unordered_map deserialize_states_caches; std::unordered_set prefetched_streams; ssize_t prefetched_from_mark = -1; + ReadBufferFromFileBase::ProfileCallback profile_callback; + clockid_t clock_type; + bool read_without_marks = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index fce733d47b7..78b67de1a7e 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -26,14 +26,12 @@ namespace ErrorCodes MergeTreeSelectProcessor::MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, const MergeTreeReaderSettings & reader_settings_) : pool(std::move(pool_)) , algorithm(std::move(algorithm_)) - , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 6b663e0fd36..03ca30dd5b3 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -26,12 +26,7 @@ struct ParallelReadingExtension { MergeTreeAllRangesCallback all_callback; MergeTreeReadTaskCallback callback; - size_t count_participating_replicas{0}; size_t number_of_current_replica{0}; - /// This is needed to estimate the number of bytes - /// between a pair of marks to perform one request - /// over the network for a 1Gb of data. - Names columns_to_read; }; /// Base class for MergeTreeThreadSelectAlgorithm and MergeTreeSelectAlgorithm @@ -41,7 +36,6 @@ public: MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, @@ -71,7 +65,6 @@ private: const MergeTreeReadPoolPtr pool; const MergeTreeSelectAlgorithmPtr algorithm; - const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index c022cfe3861..02f8d6f4f6a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -42,8 +42,7 @@ public: std::optional mark_ranges_, bool apply_deleted_mask, bool read_with_direct_io_, - bool take_column_types_from_storage, - bool quiet = false); + bool prefetch); ~MergeTreeSequentialSource() override; @@ -96,8 +95,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( std::optional mark_ranges_, bool apply_deleted_mask, bool read_with_direct_io_, - bool take_column_types_from_storage, - bool quiet) + bool prefetch) : ISource(storage_snapshot_->getSampleBlockForColumns(columns_to_read_)) , storage(storage_) , storage_snapshot(storage_snapshot_) @@ -107,16 +105,13 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( , mark_ranges(std::move(mark_ranges_)) , mark_cache(storage.getContext()->getMarkCache()) { - if (!quiet) - { - /// Print column name but don't pollute logs in case of many columns. - if (columns_to_read.size() == 1) - LOG_DEBUG(log, "Reading {} marks from part {}, total {} rows starting from the beginning of the part, column {}", - data_part->getMarksCount(), data_part->name, data_part->rows_count, columns_to_read.front()); - else - LOG_DEBUG(log, "Reading {} marks from part {}, total {} rows starting from the beginning of the part", - data_part->getMarksCount(), data_part->name, data_part->rows_count); - } + /// Print column name but don't pollute logs in case of many columns. + if (columns_to_read.size() == 1) + LOG_DEBUG(log, "Reading {} marks from part {}, total {} rows starting from the beginning of the part, column {}", + data_part->getMarksCount(), data_part->name, data_part->rows_count, columns_to_read.front()); + else + LOG_DEBUG(log, "Reading {} marks from part {}, total {} rows starting from the beginning of the part", + data_part->getMarksCount(), data_part->name, data_part->rows_count); auto alter_conversions = storage.getAlterConversionsForPart(data_part); @@ -131,21 +126,12 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( storage.supportsSubcolumns(), columns_to_read); - NamesAndTypesList columns_for_reader; - if (take_column_types_from_storage) - { - auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) - .withExtendedObjects() - .withVirtuals() - .withSubcolumns(storage.supportsSubcolumns()); + auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) + .withExtendedObjects() + .withVirtuals() + .withSubcolumns(storage.supportsSubcolumns()); - columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); - } - else - { - /// take columns from data_part - columns_for_reader = data_part->getColumns().addTypes(columns_to_read); - } + auto columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); const auto & context = storage.getContext(); ReadSettings read_settings = context->getReadSettings(); @@ -174,6 +160,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( .read_settings = read_settings, .save_marks_in_cache = false, .apply_deleted_mask = apply_deleted_mask, + .can_read_part_without_marks = true, }; if (!mark_ranges) @@ -184,12 +171,15 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( storage_snapshot, *mark_ranges, /*virtual_fields=*/ {}, - /*uncompressed_cache=*/{}, + /*uncompressed_cache=*/ {}, mark_cache.get(), alter_conversions, reader_settings, - {}, - {}); + /*avg_value_size_hints=*/ {}, + /*profile_callback=*/ {}); + + if (prefetch) + reader->prefetchBeginOfRange(Priority{}); } static void fillBlockNumberColumns( @@ -230,6 +220,7 @@ try const auto & header = getPort().getHeader(); /// Part level is useful for next step for merging non-merge tree table bool add_part_level = storage.merging_params.mode != MergeTreeData::MergingParams::Ordinary; + size_t num_marks_in_part = data_part->getMarksCount(); if (!isCancelled() && current_row < data_part->rows_count) { @@ -238,7 +229,7 @@ try const auto & sample = reader->getColumns(); Columns columns(sample.size()); - size_t rows_read = reader->readRows(current_mark, data_part->getMarksCount(), continue_reading, rows_to_read, columns); + size_t rows_read = reader->readRows(current_mark, num_marks_in_part, continue_reading, rows_to_read, columns); if (rows_read) { @@ -251,11 +242,11 @@ try bool should_evaluate_missing_defaults = false; reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read); + reader->performRequiredConversions(columns); + if (should_evaluate_missing_defaults) reader->evaluateMissingDefaults({}, columns); - reader->performRequiredConversions(columns); - /// Reorder columns and fill result block. size_t num_columns = sample.size(); Columns res_columns; @@ -311,11 +302,10 @@ Pipe createMergeTreeSequentialSource( MergeTreeData::DataPartPtr data_part, Names columns_to_read, std::optional mark_ranges, + std::shared_ptr> filtered_rows_count, bool apply_deleted_mask, bool read_with_direct_io, - bool take_column_types_from_storage, - bool quiet, - std::shared_ptr> filtered_rows_count) + bool prefetch) { /// The part might have some rows masked by lightweight deletes @@ -327,7 +317,7 @@ Pipe createMergeTreeSequentialSource( auto column_part_source = std::make_shared(type, storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), - /*apply_deleted_mask=*/ false, read_with_direct_io, take_column_types_from_storage, quiet); + /*apply_deleted_mask=*/ false, read_with_direct_io, prefetch); Pipe pipe(std::move(column_part_source)); @@ -391,7 +381,13 @@ public: if (!key_condition.alwaysFalse()) mark_ranges = MergeTreeDataSelectExecutor::markRangesFromPKRange( - data_part, metadata_snapshot, key_condition, {}, context->getSettingsRef(), log); + data_part, + metadata_snapshot, + key_condition, + /*part_offset_condition=*/{}, + /*exact_ranges=*/nullptr, + context->getSettingsRef(), + log); if (mark_ranges && mark_ranges->empty()) { @@ -406,11 +402,10 @@ public: data_part, columns_to_read, std::move(mark_ranges), + /*filtered_rows_count=*/ nullptr, apply_deleted_mask, /*read_with_direct_io=*/ false, - /*take_column_types_from_storage=*/ true, - /*quiet=*/ false, - /*filtered_rows_count=*/ nullptr); + /*prefetch=*/ false); pipeline.init(Pipe(std::move(source))); } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index a5e36a7726f..e6f055f776c 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -23,11 +23,10 @@ Pipe createMergeTreeSequentialSource( MergeTreeData::DataPartPtr data_part, Names columns_to_read, std::optional mark_ranges, + std::shared_ptr> filtered_rows_count, bool apply_deleted_mask, bool read_with_direct_io, - bool take_column_types_from_storage, - bool quiet, - std::shared_ptr> filtered_rows_count); + bool prefetch); class QueryPlan; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index a00508fd1c1..026a1da7196 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -35,7 +35,7 @@ struct Settings; M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ - M(Bool, replace_long_file_name_to_hash, false, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \ + M(Bool, replace_long_file_name_to_hash, true, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \ M(UInt64, max_file_name_length, 127, "The maximal length of the file name to keep it as is without hashing", 0) \ M(UInt64, min_bytes_for_full_part_storage, 0, "Only available in ClickHouse Cloud", 0) \ M(UInt64, min_rows_for_full_part_storage, 0, "Only available in ClickHouse Cloud", 0) \ @@ -148,6 +148,7 @@ struct Settings; M(UInt64, vertical_merge_algorithm_min_rows_to_activate, 16 * 8192, "Minimal (approximate) sum of rows in merging parts to activate Vertical merge algorithm.", 0) \ M(UInt64, vertical_merge_algorithm_min_bytes_to_activate, 0, "Minimal (approximate) uncompressed size in bytes in merging parts to activate Vertical merge algorithm.", 0) \ M(UInt64, vertical_merge_algorithm_min_columns_to_activate, 11, "Minimal amount of non-PK columns to activate Vertical merge algorithm.", 0) \ + M(Bool, vertical_merge_remote_filesystem_prefetch, true, "If true prefetching of data from remote filesystem is used for the next column during merge", 0) \ M(UInt64, max_postpone_time_for_failed_mutations_ms, 5ULL * 60 * 1000, "The maximum postpone time for failed mutations.", 0) \ \ /** Compatibility settings */ \ @@ -198,6 +199,7 @@ struct Settings; M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ M(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ + M(Bool, allow_experimental_optimized_row_order, false, "Allow reshuffling of rows during part inserts and merges to improve the compressibility of the new part", 0) \ \ /** Compress marks and primary key. */ \ M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index 02b4768f5f2..fcf2dd76e3f 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -11,7 +11,7 @@ namespace DB struct MergeTreeSource::AsyncReadingState { /// NotStarted -> InProgress -> IsFinished -> NotStarted ... - enum class Stage + enum class Stage : uint8_t { NotStarted, InProgress, diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 6f1c5302b0e..3844ac18268 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -261,9 +261,9 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree cond.columns_size = getColumnsSize(cond.table_columns); cond.viable = - !has_invalid_column && + !has_invalid_column /// Condition depend on some column. Constant expressions are not moved. - !cond.table_columns.empty() + && !cond.table_columns.empty() && !cannotBeMoved(node, where_optimizer_context) /// When use final, do not take into consideration the conditions with non-sorting keys. Because final select /// need to use all sorting keys, it will cause correctness issues if we filter other columns before final merge. @@ -273,17 +273,15 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree /// Do not move conditions involving all queried columns. && cond.table_columns.size() < queried_columns.size(); - if (cond.viable) - cond.good = isConditionGood(node, table_columns); - if (where_optimizer_context.use_statistic) { cond.good = cond.viable; - cond.selectivity = estimator.estimateSelectivity(node); - - if (node.getASTNode() != nullptr) - LOG_TEST(log, "Condition {} has selectivity {}", node.getASTNode()->dumpTree(), cond.selectivity); + LOG_TEST(log, "Condition {} has selectivity {}", node.getColumnName(), cond.selectivity); + } + else if (cond.viable) + { + cond.good = isConditionGood(node, table_columns); } if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere) @@ -363,6 +361,7 @@ std::optional MergeTreeWhereOptimizer:: /// Move condition and all other conditions depend on the same set of columns. auto move_condition = [&](Conditions::iterator cond_it) { + LOG_TRACE(log, "Condition {} moved to PREWHERE", cond_it->node.getColumnName()); prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, cond_it); total_size_of_moved_conditions += cond_it->columns_size; total_number_of_moved_columns += cond_it->table_columns.size(); @@ -371,9 +370,14 @@ std::optional MergeTreeWhereOptimizer:: for (auto jt = where_conditions.begin(); jt != where_conditions.end();) { if (jt->viable && jt->columns_size == cond_it->columns_size && jt->table_columns == cond_it->table_columns) + { + LOG_TRACE(log, "Condition {} moved to PREWHERE", jt->node.getColumnName()); prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, jt++); + } else + { ++jt; + } } }; diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h index ce9a40c5931..18f289531a5 100644 --- a/src/Storages/MergeTree/MergeType.h +++ b/src/Storages/MergeTree/MergeType.h @@ -11,7 +11,7 @@ namespace DB /// ReplicatedMergeTreeLogEntry. /// /// Order is important, don't try to change it. -enum class MergeType +enum class MergeType : uint8_t { /// Just regular merge Regular = 1, diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 9f641fd8eb5..c5799fab09f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -21,35 +22,39 @@ MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeIndices & skip_indices, const Statistics & statistics, CompressionCodecPtr default_codec_, - const MergeTreeTransactionPtr & txn, + TransactionID tid, bool reset_columns_, bool blocks_are_granules_size, const WriteSettings & write_settings_, const MergeTreeIndexGranularity & computed_index_granularity) - : IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_) + : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, reset_columns_) , columns_list(columns_list_) , default_codec(default_codec_) , write_settings(write_settings_) { MergeTreeWriterSettings writer_settings( - storage.getContext()->getSettings(), + data_part->storage.getContext()->getSettings(), write_settings, - storage.getSettings(), + storage_settings, data_part->index_granularity_info.mark_type.adaptive, /* rewrite_primary_key = */ true, blocks_are_granules_size); + /// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart if (data_part->isStoredOnDisk()) data_part_storage->createDirectories(); - /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. - TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; /// NOTE do not pass context for writing to system.transactions_info_log, /// because part may have temporary name (with temporary block numbers). Will write it later. data_part->version.setCreationTID(tid, nullptr); data_part->storeVersionMetadata(); - writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); + writer = createMergeTreeDataPartWriter(data_part->getType(), + data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), + data_part_storage, data_part->index_granularity_info, + storage_settings, + columns_list, data_part->getColumnPositions(), metadata_snapshot, data_part->storage.getVirtualsPtr(), + skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity); } /// If data is pre-sorted. @@ -181,7 +186,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); - new_part->setIndex(std::make_shared(writer->releaseIndexColumns())); + new_part->setIndex(writer->releaseIndexColumns()); new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); @@ -208,7 +213,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (new_part->isProjectionPart()) { - if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) + if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) { auto count_out = new_part->getDataPartStorage().writeFile("count.txt", 4096, write_settings); HashingWriteBuffer count_out_hashing(*count_out); @@ -234,14 +239,16 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis written_files.emplace_back(std::move(out)); } - if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - if (auto file = new_part->partition.store(storage, new_part->getDataPartStorage(), checksums)) + if (auto file = new_part->partition.store( + new_part->storage.getInMemoryMetadataPtr(), new_part->storage.getContext(), + new_part->getDataPartStorage(), checksums)) written_files.emplace_back(std::move(file)); if (new_part->minmax_idx->initialized) { - auto files = new_part->minmax_idx->store(storage, new_part->getDataPartStorage(), checksums); + auto files = new_part->minmax_idx->store(new_part->storage, new_part->getDataPartStorage(), checksums); for (auto & file : files) written_files.emplace_back(std::move(file)); } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 540b3b3bffa..c1e3d75fefc 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -22,7 +22,7 @@ public: const MergeTreeIndices & skip_indices, const Statistics & statistics, CompressionCodecPtr default_codec_, - const MergeTreeTransactionPtr & txn, + TransactionID tid, bool reset_columns_ = false, bool blocks_are_granules_size = false, const WriteSettings & write_settings = {}, diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 728b2e38833..674a9bd498f 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -20,11 +20,10 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( WrittenOffsetColumns * offset_columns_, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) - : IMergedBlockOutputStream(data_part, metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) + : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) , header(header_) { const auto & global_settings = data_part->storage.getContext()->getSettings(); - const auto & storage_settings = data_part->storage.getSettings(); MergeTreeWriterSettings writer_settings( global_settings, @@ -33,11 +32,18 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( index_granularity_info ? index_granularity_info->mark_type.adaptive : data_part->storage.canUseAdaptiveGranularity(), /* rewrite_primary_key = */ false); - writer = data_part->getWriter( + writer = createMergeTreeDataPartWriter( + data_part->getType(), + data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), + data_part_storage, data_part->index_granularity_info, + storage_settings, header.getNamesAndTypesList(), + data_part->getColumnPositions(), metadata_snapshot_, + data_part->storage.getVirtualsPtr(), indices_to_recalc, stats_to_recalc_, + data_part->getMarksFileExtension(), default_codec, writer_settings, index_granularity); diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 3415b08cebb..8d40658bb2c 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -206,6 +206,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() task_context = Context::createCopy(storage.getContext()); task_context->makeQueryContext(); task_context->setCurrentQueryId(getQueryId()); + task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); merge_mutate_entry = storage.getContext()->getMergeList().insert( storage.getStorageID(), diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 0b19aebe36d..2fd02708421 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -139,6 +139,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const context->makeQueryContext(); auto queryId = getQueryId(); context->setCurrentQueryId(queryId); + context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); return context; } diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h index ef11780a873..a5de3cc9ded 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h @@ -49,7 +49,7 @@ private: void prepare(); - enum class State + enum class State : uint8_t { NEED_PREPARE, NEED_EXECUTE, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index e11ff87d1b2..1828b8a7eeb 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -60,6 +61,21 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis return true; } +static bool haveMutationsOfDynamicColumns(const MergeTreeData::DataPartPtr & data_part, const MutationCommands & commands) +{ + for (const auto & command : commands) + { + if (!command.column_name.empty()) + { + auto column = data_part->tryGetColumn(command.column_name); + if (column && column->type->hasDynamicSubcolumns()) + return true; + } + } + + return false; +} + static UInt64 getExistingRowsCount(const Block & block) { auto column = block.getByName(RowExistsColumn::name).column; @@ -95,7 +111,7 @@ static void splitAndModifyMutationCommands( auto part_columns = part->getColumnsDescription(); const auto & table_columns = metadata_snapshot->getColumns(); - if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) + if (haveMutationsOfDynamicColumns(part, commands) || !isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { NameSet mutated_columns; NameSet dropped_columns; @@ -652,7 +668,7 @@ static NameSet collectFilesToSkip( files_to_skip.insert(index->getFileName() + index->getSerializedFileExtension()); files_to_skip.insert(index->getFileName() + mrk_extension); - // Skip all inverted index files, for they will be rebuilt + // Skip all full-text index files, for they will be rebuilt if (dynamic_cast(index.get())) { auto index_filename = index->getFileName(); @@ -731,7 +747,7 @@ static NameToNameVector collectFilesForRenames( if (command.type == MutationCommand::Type::DROP_INDEX) { static const std::array suffixes = {".idx2", ".idx"}; - static const std::array gin_suffixes = {".gin_dict", ".gin_post", ".gin_seg", ".gin_sid"}; /// .gin_* is inverted index + static const std::array gin_suffixes = {".gin_dict", ".gin_post", ".gin_seg", ".gin_sid"}; /// .gin_* means generalized inverted index (aka. full-text-index) for (const auto & suffix : suffixes) { @@ -935,7 +951,7 @@ void finalizeMutatedPart( new_data_part->rows_count = source_part->rows_count; new_data_part->index_granularity = source_part->index_granularity; - new_data_part->setIndex(source_part->getIndex()); + new_data_part->setIndex(*source_part->getIndex()); new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); @@ -1233,7 +1249,7 @@ private: void constructTaskForProjectionPartsMerge(); void finalize(); - enum class State + enum class State : uint8_t { NEED_PREPARE, NEED_MUTATE_ORIGINAL_PART, @@ -1660,7 +1676,7 @@ private: skip_indices, stats_to_rewrite, ctx->compression_codec, - ctx->txn, + ctx->txn ? ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, /*blocks_are_granules_size=*/ false, ctx->context->getWriteSettings(), @@ -1689,7 +1705,7 @@ private: ctx->out.reset(); } - enum class State + enum class State : uint8_t { NEED_PREPARE, NEED_EXECUTE, @@ -1938,8 +1954,7 @@ private: MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context, ctx->metadata_snapshot, ctx->need_sync); } - - enum class State + enum class State : uint8_t { NEED_PREPARE, NEED_EXECUTE, @@ -2146,8 +2161,8 @@ bool MutateTask::prepare() scope_guard lock; { - std::tie(part, lock) = ctx->data->cloneAndLoadDataPartOnSameDisk( - ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params, ctx->context->getReadSettings(), ctx->context->getWriteSettings()); + std::tie(part, lock) = ctx->data->cloneAndLoadDataPart( + ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params, ctx->context->getReadSettings(), ctx->context->getWriteSettings(), true/*must_on_same_disk*/); part->getDataPartStorage().beginTransaction(); ctx->temporary_directory_lock = std::move(lock); } @@ -2250,7 +2265,9 @@ bool MutateTask::prepare() /// All columns from part are changed and may be some more that were missing before in part /// TODO We can materialize compact part without copying data - if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) + /// Also currently mutations of types with dynamic subcolumns in Wide part are possible only by + /// rewriting the whole part. + if (MutationHelpers::haveMutationsOfDynamicColumns(ctx->source_part, ctx->commands_for_part) || !isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) || (ctx->interpreter && ctx->interpreter->isAffectingAllColumns())) { /// In case of replicated merge tree with zero copy replication diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h index dc21df018d7..dc22b90f0e9 100644 --- a/src/Storages/MergeTree/MutateTask.h +++ b/src/Storages/MergeTree/MutateTask.h @@ -51,7 +51,7 @@ private: bool prepare(); - enum class State + enum class State : uint8_t { NEED_PREPARE, NEED_EXECUTE diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 7d9691b847d..f3318a48883 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -167,6 +167,7 @@ public: Stats stats; size_t replicas_count{0}; size_t unavailable_replicas_count{0}; + size_t sent_initial_requests{0}; ProgressCallback progress_callback; explicit ImplInterface(size_t replicas_count_) @@ -177,9 +178,17 @@ public: virtual ~ImplInterface() = default; virtual ParallelReadResponse handleRequest(ParallelReadRequest request) = 0; - virtual void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) = 0; + virtual void doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) = 0; virtual void markReplicaAsUnavailable(size_t replica_number) = 0; + void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) + { + if (++sent_initial_requests > replicas_count) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Initiator received more initial requests than there are replicas"); + + doHandleInitialAllRangesAnnouncement(std::move(announcement)); + } + void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); } }; @@ -215,7 +224,7 @@ public: ParallelReadResponse handleRequest(ParallelReadRequest request) override; - void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override; + void doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override; void markReplicaAsUnavailable(size_t replica_number) override; @@ -223,7 +232,6 @@ private: /// This many granules will represent a single segment of marks that will be assigned to a replica const size_t mark_segment_size{0}; - size_t sent_initial_requests{0}; bool state_initialized{false}; size_t finished_replicas{0}; @@ -291,7 +299,7 @@ private: void setProgressCallback(); - enum class ScanMode + enum class ScanMode : uint8_t { /// Main working set for the replica TakeWhatsMineByHash, @@ -422,7 +430,7 @@ void DefaultCoordinator::setProgressCallback() } } -void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) +void DefaultCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) { const auto replica_num = announcement.replica_num; @@ -437,10 +445,9 @@ void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnno ++stats[replica_num].number_of_requests; replica_status[replica_num].is_announcement_received = true; - ++sent_initial_requests; LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count); - if (sent_initial_requests == replicas_count) + if (sent_initial_requests == replicas_count - unavailable_replicas_count) setProgressCallback(); /// Sift the queue to move out all invisible segments @@ -781,6 +788,11 @@ ParallelReadResponse DefaultCoordinator::handleRequest(ParallelReadRequest reque { /// Nobody will come to process any more data + for (const auto & part : all_parts_to_read) + if (!part.description.ranges.empty()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Some segments were left unread for the part {}", part.description.describe()); + if (!ranges_for_stealing_queue.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Some orphaned segments were left unread"); @@ -818,7 +830,7 @@ public: } ParallelReadResponse handleRequest([[ maybe_unused ]] ParallelReadRequest request) override; - void handleInitialAllRangesAnnouncement([[ maybe_unused ]] InitialAllRangesAnnouncement announcement) override; + void doHandleInitialAllRangesAnnouncement([[maybe_unused]] InitialAllRangesAnnouncement announcement) override; void markReplicaAsUnavailable(size_t replica_number) override; Parts all_parts_to_read; @@ -840,7 +852,7 @@ void InOrderCoordinator::markReplicaAsUnavailable(size_t replica_number) } template -void InOrderCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) +void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) { LOG_TRACE(log, "Received an announcement {}", announcement.describe()); @@ -981,12 +993,9 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init std::lock_guard lock(mutex); if (!pimpl) - { - mode = announcement.mode; - initialize(); - } + initialize(announcement.mode); - return pimpl->handleInitialAllRangesAnnouncement(std::move(announcement)); + pimpl->handleInitialAllRangesAnnouncement(std::move(announcement)); } ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request) @@ -996,10 +1005,7 @@ ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelR std::lock_guard lock(mutex); if (!pimpl) - { - mode = request.mode; - initialize(); - } + initialize(request.mode); const auto replica_num = request.replica_num; auto response = pimpl->handleRequest(std::move(request)); @@ -1024,7 +1030,7 @@ void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica pimpl->markReplicaAsUnavailable(replica_number); } -void ParallelReplicasReadingCoordinator::initialize() +void ParallelReplicasReadingCoordinator::initialize(CoordinationMode mode) { switch (mode) { diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index 9cba7d8e8c2..60343988f03 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -31,12 +31,11 @@ public: void setProgressCallback(ProgressCallback callback); private: - void initialize(); + void initialize(CoordinationMode mode); std::mutex mutex; size_t replicas_count{0}; size_t mark_segment_size{0}; - CoordinationMode mode{CoordinationMode::Default}; std::unique_ptr pimpl; ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation std::set replicas_used; diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 78fcfabb704..4228d7b70b6 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -616,8 +616,6 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st } } } - - UNREACHABLE(); } void PartMovesBetweenShardsOrchestrator::removePins(const Entry & entry, zkutil::ZooKeeperPtr zk) diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp index eb51d600da3..9de7b238f57 100644 --- a/src/Storages/MergeTree/PartitionPruner.cpp +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -4,27 +4,10 @@ namespace DB { -namespace -{ - -KeyCondition buildKeyCondition(const KeyDescription & partition_key, const SelectQueryInfo & query_info, ContextPtr context, bool strict) -{ - return {query_info.filter_actions_dag, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict}; -} - -} - -PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, const SelectQueryInfo & query_info, ContextPtr context, bool strict) - : partition_key(MergeTreePartition::adjustPartitionKey(metadata, context)) - , partition_condition(buildKeyCondition(partition_key, query_info, context, strict)) - , useless(strict ? partition_condition.anyUnknownOrAlwaysTrue() : partition_condition.alwaysUnknownOrTrue()) -{ -} - PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict) : partition_key(MergeTreePartition::adjustPartitionKey(metadata, context)) - , partition_condition(filter_actions_dag, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict) - , useless(strict ? partition_condition.anyUnknownOrAlwaysTrue() : partition_condition.alwaysUnknownOrTrue()) + , partition_condition(filter_actions_dag, context, partition_key.column_names, partition_key.expression, true /* single_point */) + , useless((strict && partition_condition.isRelaxed()) || partition_condition.alwaysUnknownOrTrue()) { } diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index e8a740b1524..ca24559ca01 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -13,8 +13,7 @@ namespace DB class PartitionPruner { public: - PartitionPruner(const StorageMetadataPtr & metadata, const SelectQueryInfo & query_info, ContextPtr context, bool strict); - PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict); + PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict = false); bool canBePruned(const IMergeTreeDataPart & part) const; diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index dc8c6b0c230..4a18d606bb7 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -424,9 +424,9 @@ RPNBuilderTreeNode RPNBuilderFunctionTreeNode::getArgumentAt(size_t index) const // because they are used only for index analysis. if (dag_node->function_base->getName() == "indexHint") { - const auto * adaptor = typeid_cast(dag_node->function_base.get()); - const auto * index_hint = typeid_cast(adaptor->getFunction().get()); - return RPNBuilderTreeNode(index_hint->getActions()->getOutputs()[index], tree_context); + const auto & adaptor = typeid_cast(*dag_node->function_base); + const auto & index_hint = typeid_cast(*adaptor.getFunction()); + return RPNBuilderTreeNode(index_hint.getActions()->getOutputs()[index], tree_context); } return RPNBuilderTreeNode(dag_node->children[index], tree_context); diff --git a/src/Storages/MergeTree/RangesInDataPart.h b/src/Storages/MergeTree/RangesInDataPart.h index e275f2c27e7..bf9e4c7dfb2 100644 --- a/src/Storages/MergeTree/RangesInDataPart.h +++ b/src/Storages/MergeTree/RangesInDataPart.h @@ -45,6 +45,7 @@ struct RangesInDataPart AlterConversionsPtr alter_conversions; size_t part_index_in_query; MarkRanges ranges; + MarkRanges exact_ranges; RangesInDataPart() = default; diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h index 2b1fcec62a8..79290c3939a 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h @@ -72,7 +72,7 @@ protected: ContextMutablePtr task_context; private: - enum class CheckExistingPartResult + enum class CheckExistingPartResult : uint8_t { PART_EXISTS, OK @@ -81,7 +81,7 @@ private: CheckExistingPartResult checkExistingPart(); bool executeImpl(); - enum class State + enum class State : uint8_t { NEED_PREPARE, NEED_EXECUTE_INNER_MERGE, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 67942491ae2..e034918ef57 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -79,10 +79,7 @@ void ReplicatedMergeTreeCleanupThread::run() else sleep_ms = static_cast(sleep_ms / ratio); - if (sleep_ms < storage_settings->cleanup_delay_period * 1000) - sleep_ms = storage_settings->cleanup_delay_period * 1000; - if (storage_settings->max_cleanup_delay_period * 1000 < sleep_ms) - sleep_ms = storage_settings->max_cleanup_delay_period * 1000; + sleep_ms = std::clamp(sleep_ms, storage_settings->cleanup_delay_period * 1000, storage_settings->max_cleanup_delay_period * 1000); UInt64 interval_ms = now_ms - prev_timestamp; LOG_TRACE(log, "Scheduling next cleanup after {}ms (points: {}, interval: {}ms, ratio: {}, points per minute: {})", diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 9eb8b6ce24c..08ddbbd60c4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -28,7 +28,7 @@ enum FormatVersion : UInt8 FORMAT_WITH_DEDUPLICATE_BY_COLUMNS = 6, FORMAT_WITH_LOG_ENTRY_ID = 7, - FORMAT_LAST + FORMAT_LAST = 8, }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 7693f34cc1e..7ff37c609eb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -9,7 +10,6 @@ #include #include -#include #include @@ -174,7 +174,7 @@ struct ReplicatedMergeTreeLogEntryData size_t quorum = 0; /// Used only in tests for permanent fault injection for particular queue entry. - bool fault_injected = false; + CopyableAtomic fault_injected{false}; /// If this MUTATE_PART entry caused by alter(modify/drop) query. bool isAlterMutation() const diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp index 24d907dbad6..9aadc3c3ca7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index d6c36d12bf5..9a368bd44f5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 86b28f72e19..4b4f4c33e7d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -113,7 +113,7 @@ namespace inline String toString(const std::vector & vec) { size_t size = vec.size(); - if (size > 50) size = 50; + size = std::min(size, 50); return fmt::format("({})", fmt::join(vec.begin(), vec.begin() + size, ",")); } } diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index 3a5bfde6c20..5f5516a6804 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include diff --git a/src/Storages/MergeTree/RowOrderOptimizer.cpp b/src/Storages/MergeTree/RowOrderOptimizer.cpp new file mode 100644 index 00000000000..34f9fed4500 --- /dev/null +++ b/src/Storages/MergeTree/RowOrderOptimizer.cpp @@ -0,0 +1,184 @@ +#include + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace +{ + +/// Do the left and right row contain equal values in the sorting key columns (usually the primary key columns) +bool haveEqualSortingKeyValues(const Block & block, const SortDescription & sort_description, size_t left_row, size_t right_row) +{ + for (const auto & sort_column : sort_description) + { + const String & sort_col = sort_column.column_name; + const IColumn & column = *block.getByName(sort_col).column; + if (column.compareAt(left_row, right_row, column, 1) != 0) + return false; + } + return true; +} + +/// Returns the sorted indexes of all non-sorting-key columns. +std::vector getOtherColumnIndexes(const Block & block, const SortDescription & sort_description) +{ + const size_t sorting_key_columns_count = sort_description.size(); + const size_t all_columns_count = block.columns(); + + std::vector other_column_indexes; + other_column_indexes.reserve(all_columns_count - sorting_key_columns_count); + + if (sorting_key_columns_count == 0) + { + other_column_indexes.resize(block.columns()); + iota(other_column_indexes.begin(), other_column_indexes.end(), 0); + } + else + { + std::vector sorted_column_indexes; + sorted_column_indexes.reserve(sorting_key_columns_count); + for (const SortColumnDescription & sort_column : sort_description) + { + size_t idx = block.getPositionByName(sort_column.column_name); + sorted_column_indexes.emplace_back(idx); + } + ::sort(sorted_column_indexes.begin(), sorted_column_indexes.end()); + + std::vector all_column_indexes(all_columns_count); + std::iota(all_column_indexes.begin(), all_column_indexes.end(), 0); + std::set_difference( + all_column_indexes.begin(), + all_column_indexes.end(), + sorted_column_indexes.begin(), + sorted_column_indexes.end(), + std::back_inserter(other_column_indexes)); + } + chassert(other_column_indexes.size() == all_columns_count - sorting_key_columns_count); + return other_column_indexes; +} + +/// Returns a set of equal row ranges (equivalence classes) with the same row values for all sorting key columns (usually primary key columns.) +/// Example with 2 PK columns, 2 other columns --> 3 equal ranges +/// pk1 pk2 c1 c2 +/// ---------------------- +/// 1 1 a b +/// 1 1 b e +/// -------- +/// 1 2 e a +/// 1 2 d c +/// 1 2 e a +/// -------- +/// 2 1 a 3 +/// ---------------------- +EqualRanges getEqualRanges(const Block & block, const SortDescription & sort_description, const IColumn::Permutation & permutation, const LoggerPtr & log) +{ + LOG_TRACE(log, "Finding equal ranges"); + EqualRanges ranges; + const size_t rows = block.rows(); + if (sort_description.empty()) + { + ranges.push_back({0, rows}); + } + else + { + for (size_t i = 0; i < rows;) + { + size_t j = i; + while (j < rows && haveEqualSortingKeyValues(block, sort_description, permutation[i], permutation[j])) + ++j; + ranges.push_back({i, j}); + i = j; + } + } + return ranges; +} + +std::vector getCardinalitiesInPermutedRange( + const Block & block, + const std::vector & other_column_indexes, + const IColumn::Permutation & permutation, + const EqualRange & equal_range) +{ + std::vector cardinalities(other_column_indexes.size()); + for (size_t i = 0; i < other_column_indexes.size(); ++i) + { + const size_t column_id = other_column_indexes[i]; + const ColumnPtr & column = block.getByPosition(column_id).column; + cardinalities[i] = column->estimateCardinalityInPermutedRange(permutation, equal_range); + } + return cardinalities; +} + +void updatePermutationInEqualRange( + const Block & block, + const std::vector & other_column_indexes, + IColumn::Permutation & permutation, + const EqualRange & equal_range, + const std::vector & cardinalities) +{ + LoggerPtr log = getLogger("RowOrderOptimizer"); + + LOG_TRACE(log, "Starting optimization in equal range"); + + std::vector column_order(other_column_indexes.size()); + iota(column_order.begin(), column_order.end(), 0); + auto cmp = [&](size_t lhs, size_t rhs) -> bool { return cardinalities[lhs] < cardinalities[rhs]; }; + stable_sort(column_order.begin(), column_order.end(), cmp); + + std::vector ranges = {equal_range}; + LOG_TRACE(log, "equal_range: .from: {}, .to: {}", equal_range.from, equal_range.to); + for (size_t i : column_order) + { + const size_t column_id = other_column_indexes[i]; + const ColumnPtr & column = block.getByPosition(column_id).column; + LOG_TRACE(log, "i: {}, column_id: {}, column->getName(): {}, cardinality: {}", i, column_id, column->getName(), cardinalities[i]); + column->updatePermutation( + IColumn::PermutationSortDirection::Ascending, IColumn::PermutationSortStability::Stable, 0, 1, permutation, ranges); + } + + LOG_TRACE(log, "Finish optimization in equal range"); +} + +} + +void RowOrderOptimizer::optimize(const Block & block, const SortDescription & sort_description, IColumn::Permutation & permutation) +{ + LoggerPtr log = getLogger("RowOrderOptimizer"); + + LOG_TRACE(log, "Starting optimization"); + + if (block.columns() == 0) + return; /// a table without columns, this should not happen in the first place ... + + if (permutation.empty()) + { + const size_t rows = block.rows(); + permutation.resize(rows); + iota(permutation.data(), rows, IColumn::Permutation::value_type(0)); + } + + const EqualRanges equal_ranges = getEqualRanges(block, sort_description, permutation, log); + const std::vector other_columns_indexes = getOtherColumnIndexes(block, sort_description); + + LOG_TRACE(log, "block.columns(): {}, block.rows(): {}, sort_description.size(): {}, equal_ranges.size(): {}", block.columns(), block.rows(), sort_description.size(), equal_ranges.size()); + + for (const auto & equal_range : equal_ranges) + { + if (equal_range.size() <= 1) + continue; + const std::vector cardinalities = getCardinalitiesInPermutedRange(block, other_columns_indexes, permutation, equal_range); + updatePermutationInEqualRange(block, other_columns_indexes, permutation, equal_range, cardinalities); + } + + LOG_TRACE(log, "Finished optimization"); +} + +} diff --git a/src/Storages/MergeTree/RowOrderOptimizer.h b/src/Storages/MergeTree/RowOrderOptimizer.h new file mode 100644 index 00000000000..f321345c3e4 --- /dev/null +++ b/src/Storages/MergeTree/RowOrderOptimizer.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class RowOrderOptimizer +{ +public: + /// Given the columns in a Block with a sub-set of them as sorting key columns (usually primary key columns --> SortDescription), and a + /// permutation of the rows, this function tries to "improve" the permutation such that the data can be compressed better by generic + /// compression algorithms such as zstd. The heuristics is based on D. Lemire, O. Kaser (2011): Reordering columns for smaller + /// indexes, https://doi.org/10.1016/j.ins.2011.02.002 + /// The algorithm works like this: + /// - Divide the sorting key columns horizontally into "equal ranges". An equal range is defined by the same sorting key values on all + /// of its rows. We can re-shuffle the non-sorting-key values within each equal range freely. + /// - Determine (estimate) for each equal range the cardinality of each non-sorting-key column. + /// - The simple heuristics applied is that non-sorting key columns will be sorted (within each equal range) in order of ascending + /// cardinality. This maximizes the length of equal-value runs within the non-sorting-key columns, leading to better compressability. + static void optimize(const Block & block, const SortDescription & sort_description, IColumn::Permutation & permutation); +}; + +} diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index ca8ed9abdb5..a94508ad41f 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -87,6 +87,7 @@ public: bool supportsPrewhere() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool supportsSubcolumns() const override { return true; } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index b4d32e71d0d..525960d5314 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -219,7 +219,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( auto file_name = *stream_name + ".bin"; checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - }); + }, column.type, data_part->getColumnSample(column)); } } else @@ -250,7 +250,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( continue; } - /// Exclude files written by inverted index from check. No correct checksums are available for them currently. + /// Exclude files written by full-text index from check. No correct checksums are available for them currently. if (isGinFile(file_name)) continue; @@ -377,7 +377,16 @@ IMergeTreeDataPart::Checksums checkDataPart( auto file_name = it->name(); if (!data_part_storage.isDirectory(file_name)) { - auto remote_path = data_part_storage.getRemotePath(file_name); + const bool is_projection_part = data_part->isProjectionPart(); + auto remote_path = data_part_storage.getRemotePath(file_name, /* if_exists */is_projection_part); + if (remote_path.empty()) + { + chassert(is_projection_part); + throw Exception( + ErrorCodes::BROKEN_PROJECTION, + "Remote path for {} does not exist for projection path. Projection {} is broken", + file_name, data_part->name); + } cache.removePathIfExists(remote_path, FileCache::getCommonUser().user_id); } } diff --git a/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp b/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp deleted file mode 100644 index 8ea732b0243..00000000000 --- a/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -std::optional tryExtractZkPathFromCreateQuery(const IAST & create_query, const ContextPtr & global_context) -{ - const auto * create = create_query.as(); - if (!create || !create->storage || !create->storage->engine) - return {}; - - /// Check if the table engine is one of the ReplicatedMergeTree family. - const auto & ast_engine = *create->storage->engine; - if (!ast_engine.name.starts_with("Replicated") || !ast_engine.name.ends_with("MergeTree")) - return {}; - - /// Get the first argument. - const auto * ast_arguments = typeid_cast(ast_engine.arguments.get()); - if (!ast_arguments || ast_arguments->children.empty()) - return {}; - - auto * ast_zk_path = typeid_cast(ast_arguments->children[0].get()); - if (!ast_zk_path || (ast_zk_path->value.getType() != Field::Types::String)) - return {}; - - String zk_path = ast_zk_path->value.safeGet(); - - /// Expand macros. - Macros::MacroExpansionInfo info; - info.table_id.table_name = create->getTable(); - info.table_id.database_name = create->getDatabase(); - info.table_id.uuid = create->uuid; - auto database = DatabaseCatalog::instance().tryGetDatabase(info.table_id.database_name); - if (database && database->getEngineName() == "Replicated") - { - info.shard = getReplicatedDatabaseShardName(database); - info.replica = getReplicatedDatabaseReplicaName(database); - } - - try - { - zk_path = global_context->getMacros()->expand(zk_path, info); - } - catch (...) - { - return {}; /// Couldn't expand macros. - } - - return zk_path; -} - -} diff --git a/src/Storages/MergeTree/extractZkPathFromCreateQuery.h b/src/Storages/MergeTree/extractZkPathFromCreateQuery.h deleted file mode 100644 index e22f76d2cd5..00000000000 --- a/src/Storages/MergeTree/extractZkPathFromCreateQuery.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ -class IAST; -class Context; -using ContextPtr = std::shared_ptr; - -/// Extracts a zookeeper path from a specified CREATE TABLE query. Returns std::nullopt if fails. -/// The function takes the first argument of the ReplicatedMergeTree table engine and expands macros in it. -/// It works like a part of what the create() function in registerStorageMergeTree.cpp does but in a simpler manner. -std::optional tryExtractZkPathFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); - -} diff --git a/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h b/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h new file mode 100644 index 00000000000..5ef5e1db62e --- /dev/null +++ b/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTCreateQuery; +class Context; +using ContextPtr = std::shared_ptr; + +/// Extracts a zookeeper path from a specified CREATE TABLE query. +/// The function checks the table engine and if it is Replicated*MergeTree then it takes the first argument and expands macros in it. +/// Returns std::nullopt if the specified CREATE query doesn't describe a Replicated table or its arguments can't be evaluated. +std::optional extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & create_query, const ContextPtr & context); + +} diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index d552a4b6fa5..d234103e52b 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -13,7 +14,6 @@ #include #include -#include #include #include @@ -31,7 +31,6 @@ namespace ErrorCodes extern const int UNKNOWN_STORAGE; extern const int NO_REPLICA_NAME_GIVEN; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY; } @@ -112,16 +111,252 @@ static ColumnsDescription getColumnsDescriptionFromZookeeper(const String & raw_ return ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_path) / "columns", &columns_stat)); } -static void verifySortingKey(const KeyDescription & sorting_key) +/// Returns whether a new syntax is used to define a table engine, i.e. MergeTree() PRIMARY KEY ... PARTITION BY ... SETTINGS ... +/// instead of MergeTree(MergeTree(date, [sample_key], primary_key). +static bool isExtendedStorageDef(const ASTCreateQuery & query) { - /// Aggregate functions already forbidden, but SimpleAggregateFunction are not - for (const auto & data_type : sorting_key.data_types) + if (query.storage && query.storage->isExtendedStorageDefinition()) + return true; + + if (query.columns_list && + ((query.columns_list->indices && !query.columns_list->indices->children.empty()) || + (query.columns_list->projections && !query.columns_list->projections->children.empty()))) { - if (dynamic_cast(data_type->getCustomName())) - throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY, "Column with type {} is not allowed in key expression", data_type->getCustomName()->getName()); + return true; + } + + return false; +} + +/// Evaluates expressions in engine arguments. +/// In new syntax an argument can be literal or identifier or array/tuple of identifiers. +static void evaluateEngineArgs(ASTs & engine_args, const ContextPtr & context) +{ + size_t arg_idx = 0; + try + { + for (; arg_idx < engine_args.size(); ++arg_idx) + { + auto & arg = engine_args[arg_idx]; + auto * arg_func = arg->as(); + if (!arg_func) + continue; + + /// If we got ASTFunction, let's evaluate it and replace with ASTLiteral. + /// Do not try evaluate array or tuple, because it's array or tuple of column identifiers. + if (arg_func->name == "array" || arg_func->name == "tuple") + continue; + Field value = evaluateConstantExpression(arg, context).first; + arg = std::make_shared(value); + } + } + catch (Exception & e) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot evaluate engine argument {}: {} {}", + arg_idx, e.message(), verbose_help_message); } } +/// Returns whether this is a Replicated table engine? +static bool isReplicated(const String & engine_name) +{ + return engine_name.starts_with("Replicated") && engine_name.ends_with("MergeTree"); +} + +/// Returns the part of the name of a table engine between "Replicated" (if any) and "MergeTree". +static std::string_view getNamePart(const String & engine_name) +{ + std::string_view name_part = engine_name; + if (name_part.starts_with("Replicated")) + name_part.remove_prefix(strlen("Replicated")); + + if (name_part.ends_with("MergeTree")) + name_part.remove_suffix(strlen("MergeTree")); + + return name_part; +} + +/// Extracts zookeeper path and replica name from the table engine's arguments. +/// The function can modify those arguments (that's why they're passed separately in `engine_args`) and also determines RenamingRestrictions. +/// The function assumes the table engine is Replicated. +static void extractZooKeeperPathAndReplicaNameFromEngineArgs( + const ASTCreateQuery & query, + const StorageID & table_id, + const String & engine_name, + ASTs & engine_args, + LoadingStrictnessLevel mode, + const ContextPtr & context, + String & zookeeper_path, + String & replica_name, + RenamingRestrictions & renaming_restrictions) +{ + chassert(isReplicated(engine_name)); + + zookeeper_path = ""; + replica_name = ""; + renaming_restrictions = RenamingRestrictions::ALLOW_ANY; + + bool is_extended_storage_def = isExtendedStorageDef(query); + + if (is_extended_storage_def) + { + /// Allow expressions in engine arguments. + /// In new syntax argument can be literal or identifier or array/tuple of identifiers. + evaluateEngineArgs(engine_args, context); + } + + bool is_on_cluster = context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + bool is_replicated_database = context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && + DatabaseCatalog::instance().getDatabase(table_id.database_name)->getEngineName() == "Replicated"; + + /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries + /// and if UUID was explicitly passed in CREATE TABLE (like for ATTACH) + bool allow_uuid_macro = is_on_cluster || is_replicated_database || query.attach || query.has_uuid; + + auto expand_macro = [&] (ASTLiteral * ast_zk_path, ASTLiteral * ast_replica_name) + { + /// Unfold {database} and {table} macro on table creation, so table can be renamed. + if (mode < LoadingStrictnessLevel::ATTACH) + { + Macros::MacroExpansionInfo info; + /// NOTE: it's not recursive + info.expand_special_macros_only = true; + info.table_id = table_id; + /// Avoid unfolding {uuid} macro on this step. + /// We did unfold it in previous versions to make moving table from Atomic to Ordinary database work correctly, + /// but now it's not allowed (and it was the only reason to unfold {uuid} macro). + info.table_id.uuid = UUIDHelpers::Nil; + zookeeper_path = context->getMacros()->expand(zookeeper_path, info); + + info.level = 0; + replica_name = context->getMacros()->expand(replica_name, info); + } + + ast_zk_path->value = zookeeper_path; + ast_replica_name->value = replica_name; + + /// Expand other macros (such as {shard} and {replica}). We do not expand them on previous step + /// to make possible copying metadata files between replicas. + Macros::MacroExpansionInfo info; + info.table_id = table_id; + if (is_replicated_database) + { + auto database = DatabaseCatalog::instance().getDatabase(table_id.database_name); + info.shard = getReplicatedDatabaseShardName(database); + info.replica = getReplicatedDatabaseReplicaName(database); + } + if (!allow_uuid_macro) + info.table_id.uuid = UUIDHelpers::Nil; + zookeeper_path = context->getMacros()->expand(zookeeper_path, info); + + info.level = 0; + info.table_id.uuid = UUIDHelpers::Nil; + replica_name = context->getMacros()->expand(replica_name, info); + + /// We do not allow renaming table with these macros in metadata, because zookeeper_path will be broken after RENAME TABLE. + /// NOTE: it may happen if table was created by older version of ClickHouse (< 20.10) and macros was not unfolded on table creation + /// or if one of these macros is recursively expanded from some other macro. + /// Also do not allow to move table from Atomic to Ordinary database if there's {uuid} macro + if (info.expanded_database || info.expanded_table) + renaming_restrictions = RenamingRestrictions::DO_NOT_ALLOW; + else if (info.expanded_uuid) + renaming_restrictions = RenamingRestrictions::ALLOW_PRESERVING_UUID; + }; + + size_t arg_num = 0; + size_t arg_cnt = engine_args.size(); + + bool has_arguments = (arg_num + 2 <= arg_cnt); + bool has_valid_arguments = has_arguments && engine_args[arg_num]->as() && engine_args[arg_num + 1]->as(); + + if (has_valid_arguments) + { + /// Get path and name from engine arguments + auto * ast_zk_path = engine_args[arg_num]->as(); + if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) + zookeeper_path = ast_zk_path->value.safeGet(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path in ZooKeeper must be a string literal{}", verbose_help_message); + + auto * ast_replica_name = engine_args[arg_num + 1]->as(); + if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) + replica_name = ast_replica_name->value.safeGet(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", verbose_help_message); + + expand_macro(ast_zk_path, ast_replica_name); + } + else if (is_extended_storage_def + && (arg_cnt == 0 + || !engine_args[arg_num]->as() + || (arg_cnt == 1 && (getNamePart(engine_name) == "Graphite")))) + { + /// Try use default values if arguments are not specified. + /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic. + const auto & server_settings = context->getServerSettings(); + zookeeper_path = server_settings.default_replica_path; + /// TODO maybe use hostname if {replica} is not defined? + replica_name = server_settings.default_replica_name; + + /// Modify query, so default values will be written to metadata + assert(arg_num == 0); + ASTs old_args; + std::swap(engine_args, old_args); + auto path_arg = std::make_shared(zookeeper_path); + auto name_arg = std::make_shared(replica_name); + auto * ast_zk_path = path_arg.get(); + auto * ast_replica_name = name_arg.get(); + + expand_macro(ast_zk_path, ast_replica_name); + + engine_args.emplace_back(std::move(path_arg)); + engine_args.emplace_back(std::move(name_arg)); + std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args)); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected two string literal arguments: zookeeper_path and replica_name"); +} + +/// Extracts a zookeeper path from a specified CREATE TABLE query. +std::optional extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & query, const ContextPtr & context) +{ + if (!query.storage || !query.storage->engine) + return {}; + + const String & engine_name = query.storage->engine->name; + if (!isReplicated(engine_name)) + return {}; + + StorageID table_id{query.getDatabase(), query.getTable(), query.uuid}; + + ASTs engine_args; + if (query.storage->engine->arguments) + engine_args = query.storage->engine->arguments->children; + for (auto & engine_arg : engine_args) + engine_arg = engine_arg->clone(); + + LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE; + String zookeeper_path; + String replica_name; + RenamingRestrictions renaming_restrictions; + + try + { + extractZooKeeperPathAndReplicaNameFromEngineArgs(query, table_id, engine_name, engine_args, mode, context, + zookeeper_path, replica_name, renaming_restrictions); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::BAD_ARGUMENTS) + { + tryLogCurrentException(__PRETTY_FUNCTION__, "Couldn't evaluate engine arguments"); + return {}; + } + throw; + } + + return zookeeper_path; +} static StoragePtr create(const StorageFactory::Arguments & args) { @@ -156,17 +391,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) * - Additional MergeTreeSettings in the SETTINGS clause; */ - bool is_extended_storage_def = args.storage_def->isExtendedStorageDefinition() - || (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty()) - || (args.query.columns_list->projections && !args.query.columns_list->projections->children.empty()); + bool is_extended_storage_def = isExtendedStorageDef(args.query); const Settings & local_settings = args.getLocalContext()->getSettingsRef(); - String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree")); - - bool replicated = startsWith(name_part, "Replicated"); - if (replicated) - name_part = name_part.substr(strlen("Replicated")); + bool replicated = isReplicated(args.engine_name); + std::string_view name_part = getNamePart(args.engine_name); MergeTreeData::MergingParams merging_params; merging_params.mode = MergeTreeData::MergingParams::Ordinary; @@ -283,29 +513,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) { /// Allow expressions in engine arguments. /// In new syntax argument can be literal or identifier or array/tuple of identifiers. - size_t arg_idx = 0; - try - { - for (; arg_idx < engine_args.size(); ++arg_idx) - { - auto & arg = engine_args[arg_idx]; - auto * arg_func = arg->as(); - if (!arg_func) - continue; - - /// If we got ASTFunction, let's evaluate it and replace with ASTLiteral. - /// Do not try evaluate array or tuple, because it's array or tuple of column identifiers. - if (arg_func->name == "array" || arg_func->name == "tuple") - continue; - Field value = evaluateConstantExpression(arg, args.getLocalContext()).first; - arg = std::make_shared(value); - } - } - catch (Exception & e) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot evaluate engine argument {}: {} {}", - arg_idx, e.message(), verbose_help_message); - } + evaluateEngineArgs(engine_args, args.getLocalContext()); } else if (args.mode <= LoadingStrictnessLevel::CREATE && !local_settings.allow_deprecated_syntax_for_merge_tree) { @@ -314,130 +522,21 @@ static StoragePtr create(const StorageFactory::Arguments & args) "See also `allow_deprecated_syntax_for_merge_tree` setting."); } - /// For Replicated. + /// Extract zookeeper path and replica name from engine arguments. String zookeeper_path; String replica_name; RenamingRestrictions renaming_restrictions = RenamingRestrictions::ALLOW_ANY; - bool is_on_cluster = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; - bool is_replicated_database = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && - DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated"; - - /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries - /// and if UUID was explicitly passed in CREATE TABLE (like for ATTACH) - bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach || args.query.has_uuid; - - auto expand_macro = [&] (ASTLiteral * ast_zk_path, ASTLiteral * ast_replica_name) - { - /// Unfold {database} and {table} macro on table creation, so table can be renamed. - if (args.mode < LoadingStrictnessLevel::ATTACH) - { - Macros::MacroExpansionInfo info; - /// NOTE: it's not recursive - info.expand_special_macros_only = true; - info.table_id = args.table_id; - /// Avoid unfolding {uuid} macro on this step. - /// We did unfold it in previous versions to make moving table from Atomic to Ordinary database work correctly, - /// but now it's not allowed (and it was the only reason to unfold {uuid} macro). - info.table_id.uuid = UUIDHelpers::Nil; - zookeeper_path = context->getMacros()->expand(zookeeper_path, info); - - info.level = 0; - replica_name = context->getMacros()->expand(replica_name, info); - } - - ast_zk_path->value = zookeeper_path; - ast_replica_name->value = replica_name; - - /// Expand other macros (such as {shard} and {replica}). We do not expand them on previous step - /// to make possible copying metadata files between replicas. - Macros::MacroExpansionInfo info; - info.table_id = args.table_id; - if (is_replicated_database) - { - auto database = DatabaseCatalog::instance().getDatabase(args.table_id.database_name); - info.shard = getReplicatedDatabaseShardName(database); - info.replica = getReplicatedDatabaseReplicaName(database); - } - if (!allow_uuid_macro) - info.table_id.uuid = UUIDHelpers::Nil; - zookeeper_path = context->getMacros()->expand(zookeeper_path, info); - - info.level = 0; - info.table_id.uuid = UUIDHelpers::Nil; - replica_name = context->getMacros()->expand(replica_name, info); - - /// We do not allow renaming table with these macros in metadata, because zookeeper_path will be broken after RENAME TABLE. - /// NOTE: it may happen if table was created by older version of ClickHouse (< 20.10) and macros was not unfolded on table creation - /// or if one of these macros is recursively expanded from some other macro. - /// Also do not allow to move table from Atomic to Ordinary database if there's {uuid} macro - if (info.expanded_database || info.expanded_table) - renaming_restrictions = RenamingRestrictions::DO_NOT_ALLOW; - else if (info.expanded_uuid) - renaming_restrictions = RenamingRestrictions::ALLOW_PRESERVING_UUID; - }; - if (replicated) { - bool has_arguments = arg_num + 2 <= arg_cnt; - bool has_valid_arguments = has_arguments && engine_args[arg_num]->as() && engine_args[arg_num + 1]->as(); + extractZooKeeperPathAndReplicaNameFromEngineArgs(args.query, args.table_id, args.engine_name, args.engine_args, args.mode, + args.getLocalContext(), zookeeper_path, replica_name, renaming_restrictions); - ASTLiteral * ast_zk_path; - ASTLiteral * ast_replica_name; + if (replica_name.empty()) + throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message); - if (has_valid_arguments) - { - /// Get path and name from engine arguments - ast_zk_path = engine_args[arg_num]->as(); - if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) - zookeeper_path = ast_zk_path->value.safeGet(); - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path in ZooKeeper must be a string literal{}", verbose_help_message); - ++arg_num; - - ast_replica_name = engine_args[arg_num]->as(); - if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) - replica_name = ast_replica_name->value.safeGet(); - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", verbose_help_message); - - if (replica_name.empty()) - throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message); - ++arg_num; - - expand_macro(ast_zk_path, ast_replica_name); - } - else if (is_extended_storage_def - && (arg_cnt == 0 - || !engine_args[arg_num]->as() - || (arg_cnt == 1 && merging_params.mode == MergeTreeData::MergingParams::Graphite))) - { - /// Try use default values if arguments are not specified. - /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic. - const auto & server_settings = args.getContext()->getServerSettings(); - zookeeper_path = server_settings.default_replica_path; - /// TODO maybe use hostname if {replica} is not defined? - replica_name = server_settings.default_replica_name; - - /// Modify query, so default values will be written to metadata - assert(arg_num == 0); - ASTs old_args; - std::swap(engine_args, old_args); - auto path_arg = std::make_shared(zookeeper_path); - auto name_arg = std::make_shared(replica_name); - ast_zk_path = path_arg.get(); - ast_replica_name = name_arg.get(); - - expand_macro(ast_zk_path, ast_replica_name); - - engine_args.emplace_back(std::move(path_arg)); - engine_args.emplace_back(std::move(name_arg)); - std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args)); - arg_num = 2; - arg_cnt += 2; - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected two string literal arguments: zookeeper_path and replica_name"); + arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments. + arg_num = 2; /// zookeeper_path and replica_name together are always two arguments. } /// This merging param maybe used as part of sorting key @@ -567,8 +666,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// column if sorting key will be changed. metadata.sorting_key = KeyDescription::getSortingKeyFromAST( args.storage_def->order_by->ptr(), metadata.columns, context, merging_param_key_arg); - if (!local_settings.allow_suspicious_primary_key) - verifySortingKey(metadata.sorting_key); + if (!local_settings.allow_suspicious_primary_key && args.mode <= LoadingStrictnessLevel::CREATE) + MergeTreeData::verifySortingKey(metadata.sorting_key); /// If primary key explicitly defined, than get it from AST if (args.storage_def->primary_key) @@ -681,8 +780,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// column if sorting key will be changed. metadata.sorting_key = KeyDescription::getSortingKeyFromAST(engine_args[arg_num], metadata.columns, context, merging_param_key_arg); - if (!local_settings.allow_suspicious_primary_key) - verifySortingKey(metadata.sorting_key); + if (!local_settings.allow_suspicious_primary_key && args.mode <= LoadingStrictnessLevel::CREATE) + MergeTreeData::verifySortingKey(metadata.sorting_key); /// In old syntax primary_key always equals to sorting key. metadata.primary_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, context); diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index c1e744e8d79..47b69d79ad8 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -1,6 +1,7 @@ #include "NamedCollectionsHelpers.h" #include #include +#include #include #include #include diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp new file mode 100644 index 00000000000..ada3e2e9323 --- /dev/null +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -0,0 +1,551 @@ +#include + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +const std::unordered_set required_configuration_keys = { + "blob_path", + "container", +}; + +const std::unordered_set optional_configuration_keys = { + "format", + "compression", + "structure", + "compression_method", + "account_name", + "account_key", + "connection_string", + "storage_account_url", +}; + +using AzureClient = Azure::Storage::Blobs::BlobContainerClient; +using AzureClientPtr = std::unique_ptr; + +namespace +{ + bool isConnectionString(const std::string & candidate) + { + return !candidate.starts_with("http"); + } + + template + bool containerExists(T & blob_service_client, const std::string & container_name) + { + Azure::Storage::Blobs::ListBlobContainersOptions options; + options.Prefix = container_name; + options.PageSizeHint = 1; + + auto containers_list_response = blob_service_client.ListBlobContainers(options); + auto containers_list = containers_list_response.BlobContainers; + + auto it = std::find_if( + containers_list.begin(), containers_list.end(), + [&](const auto & c) { return c.Name == container_name; }); + return it != containers_list.end(); + } +} + +Poco::URI StorageAzureConfiguration::getConnectionURL() const +{ + if (!is_connection_string) + return Poco::URI(connection_url); + + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); + return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); +} + +void StorageAzureConfiguration::check(ContextPtr context) const +{ + context->getGlobalContext()->getRemoteHostFilter().checkURL(getConnectionURL()); + Configuration::check(context); +} + +StorageAzureConfiguration::StorageAzureConfiguration(const StorageAzureConfiguration & other) + : Configuration(other) +{ + connection_url = other.connection_url; + is_connection_string = other.is_connection_string; + account_name = other.account_name; + account_key = other.account_key; + container = other.container; + blob_path = other.blob_path; + blobs_paths = other.blobs_paths; +} + +AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(ContextPtr context) +{ + const auto & context_settings = context->getSettingsRef(); + auto settings_ptr = std::make_unique(); + settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; + settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; + settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); + settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; + settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size; + settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload; + settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size; + return settings_ptr; +} + +StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.azure_truncate_on_insert, + .create_new_file_on_insert = settings.azure_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.azure_skip_empty_files, + .list_object_keys_size = settings.azure_list_object_keys_size, + .throw_on_zero_files_match = settings.azure_throw_on_zero_files_match, + .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist, + }; +} + +ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT +{ + assertInitialized(); + auto client = createClient(is_readonly, /* attempt_to_create_container */true); + auto settings = createSettings(context); + return std::make_unique( + "AzureBlobStorage", std::move(client), std::move(settings), container, getConnectionURL().toString()); +} + +AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool attempt_to_create_container) +{ + using namespace Azure::Storage::Blobs; + + AzureClientPtr result; + + if (is_connection_string) + { + auto managed_identity_credential = std::make_shared(); + auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); + result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); + + if (attempt_to_create_container) + { + bool container_exists = containerExists(*blob_service_client, container); + if (!container_exists) + { + if (is_read_only) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "AzureBlobStorage container does not exist '{}'", + container); + + try + { + result->CreateIfNotExists(); + } + catch (const Azure::Storage::StorageException & e) + { + if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.")) + { + throw; + } + } + } + } + } + else + { + std::shared_ptr storage_shared_key_credential; + if (account_name.has_value() && account_key.has_value()) + { + storage_shared_key_credential + = std::make_shared(*account_name, *account_key); + } + + std::unique_ptr blob_service_client; + std::shared_ptr managed_identity_credential; + if (storage_shared_key_credential) + { + blob_service_client = std::make_unique(connection_url, storage_shared_key_credential); + } + else + { + managed_identity_credential = std::make_shared(); + blob_service_client = std::make_unique(connection_url, managed_identity_credential); + } + + std::string final_url; + size_t pos = connection_url.find('?'); + if (pos != std::string::npos) + { + auto url_without_sas = connection_url.substr(0, pos); + final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + container + + connection_url.substr(pos); + } + else + final_url + = connection_url + (connection_url.back() == '/' ? "" : "/") + container; + + if (!attempt_to_create_container) + { + if (storage_shared_key_credential) + return std::make_unique(final_url, storage_shared_key_credential); + else + return std::make_unique(final_url, managed_identity_credential); + } + + bool container_exists = containerExists(*blob_service_client, container); + if (container_exists) + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url, managed_identity_credential); + } + else + { + if (is_read_only) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "AzureBlobStorage container does not exist '{}'", + container); + try + { + result = std::make_unique(blob_service_client->CreateBlobContainer(container).Value); + } catch (const Azure::Storage::StorageException & e) + { + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.") + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url, managed_identity_credential); + } + else + { + throw; + } + } + } + } + + return result; +} + +void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection) +{ + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + + if (collection.has("connection_string")) + { + connection_url = collection.get("connection_string"); + is_connection_string = true; + } + + if (collection.has("storage_account_url")) + { + connection_url = collection.get("storage_account_url"); + is_connection_string = false; + } + + container = collection.get("container"); + blob_path = collection.get("blob_path"); + + if (collection.has("account_name")) + account_name = collection.get("account_name"); + + if (collection.has("account_key")) + account_key = collection.get("account_key"); + + structure = collection.getOrDefault("structure", "auto"); + format = collection.getOrDefault("format", format); + compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + + blobs_paths = {blob_path}; +} + +void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) +{ + if (engine_args.size() < 3 || engine_args.size() > (with_structure ? 8 : 7)) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage AzureBlobStorage requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure)])"); + } + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); + + std::unordered_map engine_args_to_idx; + + connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + is_connection_string = isConnectionString(connection_url); + + container = checkAndGetLiteralArgument(engine_args[1], "container"); + blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + + auto is_format_arg = [] (const std::string & s) -> bool + { + return s == "auto" || FormatFactory::instance().getAllFormats().contains(Poco::toLower(s)); + }; + + if (engine_args.size() == 4) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + format = fourth_arg; + } + else + { + if (with_structure) + structure = fourth_arg; + else + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Unknown format or account name specified without account key: {}", fourth_arg); + } + } + else if (engine_args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + format = fourth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + } + } + else if (engine_args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + if (with_structure) + { + format = fourth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + structure = checkAndGetLiteralArgument(engine_args[5], "structure"); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (is_format_arg(sixth_arg)) + format = sixth_arg; + else + { + if (with_structure) + structure = sixth_arg; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + } + } + } + else if (engine_args.size() == 7) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (!with_structure && is_format_arg(fourth_arg)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (!is_format_arg(sixth_arg)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + format = sixth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); + } + } + else if (with_structure && engine_args.size() == 8) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (!is_format_arg(sixth_arg)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + format = sixth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); + structure = checkAndGetLiteralArgument(engine_args[7], "structure"); + } + + blobs_paths = {blob_path}; +} + +void StorageAzureConfiguration::addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + if (args.size() < 3 || args.size() > 8) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "StorageObjectStorage(connection_string|storage_account_url, container_name, " + "blobpath, [account_name, account_key, format, compression, structure])"); + } + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + auto structure_literal = std::make_shared(structure_); + auto format_literal = std::make_shared(format_); + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + /// (connection_string, container_name, blobpath) + if (args.size() == 3) + { + args.push_back(format_literal); + /// Add compression = "auto" before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// (connection_string, container_name, blobpath, structure) or + /// (connection_string, container_name, blobpath, format) + /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. + else if (args.size() == 4) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); + /// (..., format) -> (..., format, compression, structure) + if (is_format_arg(fourth_arg)) + { + if (fourth_arg == "auto") + args[3] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// (..., structure) -> (..., format, compression, structure) + else + { + auto structure_arg = args.back(); + args[3] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + if (fourth_arg == "auto") + args.push_back(structure_literal); + else + args.push_back(structure_arg); + } + } + /// (connection_string, container_name, blobpath, format, compression) or + /// (storage_account_url, container_name, blobpath, account_name, account_key) + /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. + else if (args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + /// (..., format, compression) -> (..., format, compression, structure) + if (is_format_arg(fourth_arg)) + { + if (fourth_arg == "auto") + args[3] = format_literal; + args.push_back(structure_literal); + } + /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure) + else + { + args.push_back(format_literal); + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + } + /// (connection_string, container_name, blobpath, format, compression, structure) or + /// (storage_account_url, container_name, blobpath, account_name, account_key, structure) or + /// (storage_account_url, container_name, blobpath, account_name, account_key, format) + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + auto sixth_arg = checkAndGetLiteralArgument(args[5], "format/structure"); + + /// (..., format, compression, structure) + if (is_format_arg(fourth_arg)) + { + if (fourth_arg == "auto") + args[3] = format_literal; + if (checkAndGetLiteralArgument(args[5], "structure") == "auto") + args[5] = structure_literal; + } + /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure) + else if (is_format_arg(sixth_arg)) + { + if (sixth_arg == "auto") + args[5] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure) + else + { + auto structure_arg = args.back(); + args[5] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + if (sixth_arg == "auto") + args.push_back(structure_literal); + else + args.push_back(structure_arg); + } + } + /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression) + else if (args.size() == 7) + { + /// (..., format, compression) -> (..., format, compression, structure) + if (checkAndGetLiteralArgument(args[5], "format") == "auto") + args[5] = format_literal; + args.push_back(structure_literal); + } + /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + else if (args.size() == 8) + { + if (checkAndGetLiteralArgument(args[5], "format") == "auto") + args[5] = format_literal; + if (checkAndGetLiteralArgument(args[7], "structure") == "auto") + args[7] = structure_literal; + } + } +} + +} + +#endif diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h new file mode 100644 index 00000000000..35b19079ca9 --- /dev/null +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -0,0 +1,77 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE +#include +#include +#include + +namespace DB +{ +class BackupFactory; + +class StorageAzureConfiguration : public StorageObjectStorage::Configuration +{ + friend class BackupReaderAzureBlobStorage; + friend class BackupWriterAzureBlobStorage; + friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory); + +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto type_name = "azure"; + static constexpr auto engine_name = "Azure"; + + StorageAzureConfiguration() = default; + StorageAzureConfiguration(const StorageAzureConfiguration & other); + + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return engine_name; } + + Path getPath() const override { return blob_path; } + void setPath(const Path & path) override { blob_path = path; } + + const Paths & getPaths() const override { return blobs_paths; } + void setPaths(const Paths & paths) override { blobs_paths = paths; } + + String getNamespace() const override { return container; } + String getDataSourceDescription() const override { return std::filesystem::path(connection_url) / container; } + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; + + void check(ContextPtr context) const override; + ConfigurationPtr clone() override { return std::make_shared(*this); } + + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; + + void addStructureAndFormatToArgs( + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) override; + +protected: + void fromNamedCollection(const NamedCollection & collection) override; + void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + + using AzureClient = Azure::Storage::Blobs::BlobContainerClient; + using AzureClientPtr = std::unique_ptr; + + std::string connection_url; + bool is_connection_string; + + std::optional account_name; + std::optional account_key; + + std::string container; + std::string blob_path; + std::vector blobs_paths; + + AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); + AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); + Poco::URI getConnectionURL() const; +}; + +} + +#endif diff --git a/src/Storages/ObjectStorage/DataLakes/Common.cpp b/src/Storages/ObjectStorage/DataLakes/Common.cpp new file mode 100644 index 00000000000..4830cc52a90 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/Common.cpp @@ -0,0 +1,28 @@ +#include "Common.h" +#include +#include +#include + +namespace DB +{ + +std::vector listFiles( + const IObjectStorage & object_storage, + const StorageObjectStorage::Configuration & configuration, + const String & prefix, const String & suffix) +{ + auto key = std::filesystem::path(configuration.getPath()) / prefix; + RelativePathsWithMetadata files_with_metadata; + object_storage.listObjects(key, files_with_metadata, 0); + Strings res; + for (const auto & file_with_metadata : files_with_metadata) + { + const auto & filename = file_with_metadata->relative_path; + if (filename.ends_with(suffix)) + res.push_back(filename); + } + LOG_TRACE(getLogger("DataLakeCommon"), "Listed {} files ({})", res.size(), fmt::join(res, ", ")); + return res; +} + +} diff --git a/src/Storages/ObjectStorage/DataLakes/Common.h b/src/Storages/ObjectStorage/DataLakes/Common.h new file mode 100644 index 00000000000..db3afa9e4a6 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/Common.h @@ -0,0 +1,15 @@ +#pragma once +#include +#include + +namespace DB +{ + +class IObjectStorage; + +std::vector listFiles( + const IObjectStorage & object_storage, + const StorageObjectStorage::Configuration & configuration, + const String & prefix, const String & suffix); + +} diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp similarity index 77% rename from src/Storages/DataLakes/DeltaLakeMetadataParser.cpp rename to src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 14a912a180d..38bf3112ee2 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -1,11 +1,9 @@ -#include +#include #include #include "config.h" #include #if USE_AWS_S3 && USE_PARQUET -#include -#include #include #include #include @@ -13,12 +11,12 @@ #include #include #include +#include +#include #include #include #include -#include - -namespace fs = std::filesystem; +#include namespace DB { @@ -29,13 +27,24 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -template -struct DeltaLakeMetadataParser::Impl +struct DeltaLakeMetadata::Impl { + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + ContextPtr context; + /** * Useful links: * - https://github.com/delta-io/delta/blob/master/PROTOCOL.md#data-files */ + Impl(ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : object_storage(object_storage_) + , configuration(configuration_) + , context(context_) + { + } /** * DeltaLake tables store metadata files and data files. @@ -65,10 +74,10 @@ struct DeltaLakeMetadataParser::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles(const Configuration & configuration, ContextPtr context) + std::set processMetadataFiles() { std::set result_files; - const auto checkpoint_version = getCheckpointIfExists(result_files, configuration, context); + const auto checkpoint_version = getCheckpointIfExists(result_files); if (checkpoint_version) { @@ -76,12 +85,12 @@ struct DeltaLakeMetadataParser::Impl while (true) { const auto filename = withPadding(++current_version) + metadata_file_suffix; - const auto file_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / filename; + const auto file_path = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / filename; - if (!MetadataReadHelper::exists(file_path, configuration)) + if (!object_storage->exists(StoredObject(file_path))) break; - processMetadataFile(file_path, result_files, configuration, context); + processMetadataFile(file_path, result_files); } LOG_TRACE( @@ -90,11 +99,9 @@ struct DeltaLakeMetadataParser::Impl } else { - const auto keys = MetadataReadHelper::listFiles( - configuration, deltalake_metadata_directory, metadata_file_suffix); - + const auto keys = listFiles(*object_storage, *configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files, configuration, context); + processMetadataFile(key, result_files); } return result_files; @@ -129,13 +136,10 @@ struct DeltaLakeMetadataParser::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ - void processMetadataFile( - const String & key, - std::set & result, - const Configuration & configuration, - ContextPtr context) + void processMetadataFile(const String & key, std::set & result) const { - auto buf = MetadataReadHelper::createReadBuffer(key, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(key), read_settings); char c; while (!buf->eof()) @@ -157,12 +161,12 @@ struct DeltaLakeMetadataParser::Impl if (json.has("add")) { const auto path = json["add"]["path"].getString(); - result.insert(fs::path(configuration.getPath()) / path); + result.insert(std::filesystem::path(configuration->getPath()) / path); } else if (json.has("remove")) { const auto path = json["remove"]["path"].getString(); - result.erase(fs::path(configuration.getPath()) / path); + result.erase(std::filesystem::path(configuration->getPath()) / path); } } } @@ -180,14 +184,15 @@ struct DeltaLakeMetadataParser::Impl * * We need to get "version", which is the version of the checkpoint we need to read. */ - size_t readLastCheckpointIfExists(const Configuration & configuration, ContextPtr context) + size_t readLastCheckpointIfExists() const { - const auto last_checkpoint_file = fs::path(configuration.getPath()) / deltalake_metadata_directory / "_last_checkpoint"; - if (!MetadataReadHelper::exists(last_checkpoint_file, configuration)) + const auto last_checkpoint_file = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint"; + if (!object_storage->exists(StoredObject(last_checkpoint_file))) return 0; String json_str; - auto buf = MetadataReadHelper::createReadBuffer(last_checkpoint_file, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings); readJSONObjectPossiblyInvalid(json_str, *buf); const JSON json(json_str); @@ -237,18 +242,19 @@ struct DeltaLakeMetadataParser::Impl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists(std::set & result, const Configuration & configuration, ContextPtr context) + size_t getCheckpointIfExists(std::set & result) { - const auto version = readLastCheckpointIfExists(configuration, context); + const auto version = readLastCheckpointIfExists(); if (!version) return 0; const auto checkpoint_filename = withPadding(version) + ".checkpoint.parquet"; - const auto checkpoint_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / checkpoint_filename; + const auto checkpoint_path = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / checkpoint_filename; LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string()); - auto buf = MetadataReadHelper::createReadBuffer(checkpoint_path, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings); auto format_settings = getFormatSettings(context); /// Force nullable, because this parquet file for some reason does not have nullable @@ -263,13 +269,12 @@ struct DeltaLakeMetadataParser::Impl header.insert({column.type->createColumn(), column.type, column.name}); std::atomic is_stopped{0}; - auto arrow_file = asArrowFile(*buf, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES); std::unique_ptr reader; THROW_ARROW_NOT_OK( parquet::arrow::OpenFile( asArrowFile(*buf, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES), - arrow::default_memory_pool(), + ArrowMemoryPool::instance(), &reader)); std::shared_ptr schema; @@ -305,7 +310,7 @@ struct DeltaLakeMetadataParser::Impl if (filename.empty()) continue; LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(fs::path(configuration.getPath()) / filename); + const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename); if (!inserted) throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); } @@ -316,22 +321,24 @@ struct DeltaLakeMetadataParser::Impl LoggerPtr log = getLogger("DeltaLakeMetadataParser"); }; - -template -DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique()) +DeltaLakeMetadata::DeltaLakeMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : impl(std::make_unique(object_storage_, configuration_, context_)) { } -template -Strings DeltaLakeMetadataParser::getFiles(const Configuration & configuration, ContextPtr context) +Strings DeltaLakeMetadata::getDataFiles() const { - auto result = impl->processMetadataFiles(configuration, context); - return Strings(result.begin(), result.end()); + if (!data_files.empty()) + return data_files; + + auto result = impl->processMetadataFiles(); + data_files = Strings(result.begin(), result.end()); + return data_files; } -template DeltaLakeMetadataParser::DeltaLakeMetadataParser(); -template Strings DeltaLakeMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); } #endif diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h new file mode 100644 index 00000000000..e527721b29e --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class DeltaLakeMetadata final : public IDataLakeMetadata +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto name = "DeltaLake"; + + DeltaLakeMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_); + + Strings getDataFiles() const override; + + NamesAndTypesList getTableSchema() const override { return {}; } + + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * deltalake_metadata = dynamic_cast(&other); + return deltalake_metadata + && !data_files.empty() && !deltalake_metadata->data_files.empty() + && data_files == deltalake_metadata->data_files; + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context) + { + return std::make_unique(object_storage, configuration, local_context); + } + +private: + struct Impl; + const std::shared_ptr impl; + mutable Strings data_files; +}; + +} diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp new file mode 100644 index 00000000000..91a586ccbf9 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include +#include +#include "config.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** + * Useful links: + * - https://hudi.apache.org/tech-specs/ + * - https://hudi.apache.org/docs/file_layouts/ + */ + +/** + * Hudi tables store metadata files and data files. + * Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg, + * metadata is not required in order to understand which files we need to read, moreover, + * for Hudi metadata does not always exist. + * + * There can be two types of data files + * 1. base files (columnar file formats like Apache Parquet/Orc) + * 2. log files + * Currently we support reading only `base files`. + * Data file name format: + * [File Id]_[File Write Token]_[Transaction timestamp].[File Extension] + * + * To find needed parts we need to find out latest part file for every file group for every partition. + * Explanation why: + * Hudi reads in and overwrites the entire table/partition with each update. + * Hudi controls the number of file groups under a single partition according to the + * hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group. + * Each file group is identified by File Id. + */ +Strings HudiMetadata::getDataFilesImpl() const +{ + auto log = getLogger("HudiMetadata"); + const auto keys = listFiles(*object_storage, *configuration, "", Poco::toLower(configuration->format)); + + using Partition = std::string; + using FileID = std::string; + struct FileInfo + { + String key; + UInt64 timestamp = 0; + }; + std::unordered_map> files; + + for (const auto & key : keys) + { + auto key_file = std::filesystem::path(key); + Strings file_parts; + const String stem = key_file.stem(); + splitInto<'_'>(file_parts, stem); + if (file_parts.size() != 3) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); + + const auto partition = key_file.parent_path().stem(); + const auto & file_id = file_parts[0]; + const auto timestamp = parse(file_parts[2]); + + auto & file_info = files[partition][file_id]; + if (file_info.timestamp == 0 || file_info.timestamp < timestamp) + { + file_info.key = key; + file_info.timestamp = timestamp; + } + } + + Strings result; + for (auto & [partition, partition_data] : files) + { + LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); + for (auto & [file_id, file_data] : partition_data) + result.push_back(std::move(file_data.key)); + } + return result; +} + +HudiMetadata::HudiMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) +{ +} + +Strings HudiMetadata::getDataFiles() const +{ + if (data_files.empty()) + data_files = getDataFilesImpl(); + return data_files; +} + +} diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h new file mode 100644 index 00000000000..3ab274b1fbf --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class HudiMetadata final : public IDataLakeMetadata, private WithContext +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto name = "Hudi"; + + HudiMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_); + + Strings getDataFiles() const override; + + NamesAndTypesList getTableSchema() const override { return {}; } + + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * hudi_metadata = dynamic_cast(&other); + return hudi_metadata + && !data_files.empty() && !hudi_metadata->data_files.empty() + && data_files == hudi_metadata->data_files; + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context) + { + return std::make_unique(object_storage, configuration, local_context); + } + +private: + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + mutable Strings data_files; + + Strings getDataFilesImpl() const; +}; + +} diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h new file mode 100644 index 00000000000..a2bd5adb947 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -0,0 +1,19 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class IDataLakeMetadata : boost::noncopyable +{ +public: + virtual ~IDataLakeMetadata() = default; + virtual Strings getDataFiles() const = 0; + virtual NamesAndTypesList getTableSchema() const = 0; + virtual bool operator==(const IDataLakeMetadata & other) const = 0; +}; +using DataLakeMetadataPtr = std::unique_ptr; + +} diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h new file mode 100644 index 00000000000..83865c47eb8 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -0,0 +1,139 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 && USE_AVRO + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +/// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) +/// Right now it's implemented on top of StorageS3 and right now it doesn't support +/// many Iceberg features like schema evolution, partitioning, positional and equality deletes. +template +class IStorageDataLake final : public StorageObjectStorage +{ +public: + using Storage = StorageObjectStorage; + using ConfigurationPtr = Storage::ConfigurationPtr; + + static StoragePtr create( + ConfigurationPtr base_configuration, + ContextPtr context, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment_, + std::optional format_settings_, + LoadingStrictnessLevel mode) + { + auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true); + DataLakeMetadataPtr metadata; + NamesAndTypesList schema_from_metadata; + + if (base_configuration->format == "auto") + base_configuration->format = "Parquet"; + + ConfigurationPtr configuration = base_configuration->clone(); + + try + { + metadata = DataLakeMetadata::create(object_storage, base_configuration, context); + schema_from_metadata = metadata->getTableSchema(); + configuration->setPaths(metadata->getDataFiles()); + } + catch (...) + { + if (mode <= LoadingStrictnessLevel::CREATE) + throw; + + metadata.reset(); + configuration->setPaths({}); + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + return std::make_shared>( + base_configuration, std::move(metadata), configuration, object_storage, + context, table_id_, + columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + constraints_, comment_, format_settings_); + } + + String getName() const override { return DataLakeMetadata::name; } + + static ColumnsDescription getTableStructureFromData( + ObjectStoragePtr object_storage_, + ConfigurationPtr base_configuration, + const std::optional & format_settings_, + ContextPtr local_context) + { + auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context); + + auto schema_from_metadata = metadata->getTableSchema(); + if (schema_from_metadata != NamesAndTypesList{}) + { + return ColumnsDescription(std::move(schema_from_metadata)); + } + else + { + ConfigurationPtr configuration = base_configuration->clone(); + configuration->setPaths(metadata->getDataFiles()); + return Storage::resolveSchemaFromData( + object_storage_, configuration, format_settings_, local_context); + } + } + + void updateConfiguration(ContextPtr local_context) override + { + Storage::updateConfiguration(local_context); + + auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); + + if (current_metadata && *current_metadata == *new_metadata) + return; + + current_metadata = std::move(new_metadata); + auto updated_configuration = base_configuration->clone(); + updated_configuration->setPaths(current_metadata->getDataFiles()); + + Storage::configuration = updated_configuration; + } + + template + IStorageDataLake( + ConfigurationPtr base_configuration_, + DataLakeMetadataPtr metadata_, + Args &&... args) + : Storage(std::forward(args)...) + , base_configuration(base_configuration_) + , current_metadata(std::move(metadata_)) + { + if (base_configuration->format == "auto") + { + base_configuration->format = Storage::configuration->format; + } + } + +private: + ConfigurationPtr base_configuration; + DataLakeMetadataPtr current_metadata; +}; + +using StorageIceberg = IStorageDataLake; +using StorageDeltaLake = IStorageDataLake; +using StorageHudi = IStorageDataLake; + +} + +#endif diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp similarity index 93% rename from src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp rename to src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp index df1536f53fc..0484f86542c 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp @@ -21,30 +21,32 @@ #include #include #include +#include #include #include -#include -#include -#include +#include +#include #include #include #include +#include namespace DB { namespace ErrorCodes { - extern const int FILE_DOESNT_EXIST; - extern const int ILLEGAL_COLUMN; - extern const int BAD_ARGUMENTS; - extern const int UNSUPPORTED_METHOD; +extern const int FILE_DOESNT_EXIST; +extern const int ILLEGAL_COLUMN; +extern const int BAD_ARGUMENTS; +extern const int UNSUPPORTED_METHOD; } IcebergMetadata::IcebergMetadata( - const StorageS3::Configuration & configuration_, + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, DB::ContextPtr context_, Int32 metadata_version_, Int32 format_version_, @@ -52,6 +54,7 @@ IcebergMetadata::IcebergMetadata( Int32 current_schema_id_, DB::NamesAndTypesList schema_) : WithContext(context_) + , object_storage(object_storage_) , configuration(configuration_) , metadata_version(metadata_version_) , format_version(format_version_) @@ -65,14 +68,14 @@ IcebergMetadata::IcebergMetadata( namespace { -enum class ManifestEntryStatus +enum class ManifestEntryStatus : uint8_t { EXISTING = 0, ADDED = 1, DELETED = 2, }; -enum class DataFileContent +enum class DataFileContent : uint8_t { DATA = 0, POSITION_DELETES = 1, @@ -337,15 +340,17 @@ MutableColumns parseAvro( * 1) v.metadata.json, where V - metadata version. * 2) -.metadata.json, where V - metadata version */ -std::pair getMetadataFileAndVersion(const StorageS3::Configuration & configuration) +std::pair getMetadataFileAndVersion( + ObjectStoragePtr object_storage, + const StorageObjectStorage::Configuration & configuration) { - const auto metadata_files = S3DataLakeMetadataReadHelper::listFiles(configuration, "metadata", ".metadata.json"); + const auto metadata_files = listFiles(*object_storage, configuration, "metadata", ".metadata.json"); if (metadata_files.empty()) { throw Exception( ErrorCodes::FILE_DOESNT_EXIST, "The metadata file for Iceberg table with path {} doesn't exist", - configuration.url.key); + configuration.getPath()); } std::vector> metadata_files_with_versions; @@ -372,11 +377,15 @@ std::pair getMetadataFileAndVersion(const StorageS3::Configuratio } -std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context_) +DataLakeMetadataPtr IcebergMetadata::create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context) { - const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(configuration); + const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration); LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path); - auto buf = S3DataLakeMetadataReadHelper::createReadBuffer(metadata_file_path, context_, configuration); + auto read_settings = local_context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings); String json_str; readJSONObjectPossiblyInvalid(json_str, *buf); @@ -385,7 +394,7 @@ std::unique_ptr parseIcebergMetadata(const StorageS3::Configura Poco::JSON::Object::Ptr object = json.extract(); auto format_version = object->getValue("format-version"); - auto [schema, schema_id] = parseTableSchema(object, format_version, context_->getSettingsRef().iceberg_engine_ignore_schema_evolution); + auto [schema, schema_id] = parseTableSchema(object, format_version, local_context->getSettingsRef().iceberg_engine_ignore_schema_evolution); auto current_snapshot_id = object->getValue("current-snapshot-id"); auto snapshots = object->get("snapshots").extract(); @@ -397,12 +406,12 @@ std::unique_ptr parseIcebergMetadata(const StorageS3::Configura if (snapshot->getValue("snapshot-id") == current_snapshot_id) { const auto path = snapshot->getValue("manifest-list"); - manifest_list_file = std::filesystem::path(configuration.url.key) / "metadata" / std::filesystem::path(path).filename(); + manifest_list_file = std::filesystem::path(configuration->getPath()) / "metadata" / std::filesystem::path(path).filename(); break; } } - return std::make_unique(configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema); + return std::make_unique(object_storage, configuration, local_context, metadata_version, format_version, manifest_list_file, schema_id, schema); } /** @@ -430,7 +439,7 @@ std::unique_ptr parseIcebergMetadata(const StorageS3::Configura * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=2/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00003.parquet','PARQUET',(2),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0'),(2,'3')],[(1,'\0\0\0\0\0\0\0'),(2,'3')],NULL,[4],0) │ * └────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ */ -Strings IcebergMetadata::getDataFiles() +Strings IcebergMetadata::getDataFiles() const { if (!data_files.empty()) return data_files; @@ -441,12 +450,14 @@ Strings IcebergMetadata::getDataFiles() LOG_TEST(log, "Collect manifest files from manifest list {}", manifest_list_file); - auto manifest_list_buf = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_list_file, getContext(), configuration); + auto context = getContext(); + auto read_settings = context->getReadSettings(); + auto manifest_list_buf = object_storage->readObject(StoredObject(manifest_list_file), read_settings); auto manifest_list_file_reader = std::make_unique(std::make_unique(*manifest_list_buf)); auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0)); Block header{{data_type->createColumn(), data_type, "manifest_path"}}; - auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(getContext())); + auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(context)); auto & col = columns.at(0); if (col->getDataType() != TypeIndex::String) @@ -462,7 +473,7 @@ Strings IcebergMetadata::getDataFiles() { const auto file_path = col_str->getDataAt(i).toView(); const auto filename = std::filesystem::path(file_path).filename(); - manifest_files.emplace_back(std::filesystem::path(configuration.url.key) / "metadata" / filename); + manifest_files.emplace_back(std::filesystem::path(configuration->getPath()) / "metadata" / filename); } NameSet files; @@ -471,7 +482,7 @@ Strings IcebergMetadata::getDataFiles() { LOG_TEST(log, "Process manifest file {}", manifest_file); - auto buffer = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_file, getContext(), configuration); + auto buffer = object_storage->readObject(StoredObject(manifest_file), read_settings); auto manifest_file_reader = std::make_unique(std::make_unique(*buffer)); /// Manifest file should always have table schema in avro file metadata. By now we don't support tables with evolved schema, @@ -482,7 +493,7 @@ Strings IcebergMetadata::getDataFiles() Poco::JSON::Parser parser; Poco::Dynamic::Var json = parser.parse(schema_json_string); Poco::JSON::Object::Ptr schema_object = json.extract(); - if (!getContext()->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue("schema-id") != current_schema_id) + if (!context->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue("schema-id") != current_schema_id) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not " @@ -595,9 +606,9 @@ Strings IcebergMetadata::getDataFiles() const auto status = status_int_column->getInt(i); const auto data_path = std::string(file_path_string_column->getDataAt(i).toView()); - const auto pos = data_path.find(configuration.url.key); + const auto pos = data_path.find(configuration->getPath()); if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration.url.key, data_path); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration->getPath(), data_path); const auto file_path = data_path.substr(pos); diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h similarity index 69% rename from src/Storages/DataLakes/Iceberg/IcebergMetadata.h rename to src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index 3e6a2ec3415..06dbd373bf9 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -2,9 +2,11 @@ #if USE_AWS_S3 && USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. -#include #include #include +#include +#include +#include namespace DB { @@ -56,40 +58,55 @@ namespace DB * "metadata-log" : [ ] * } */ -class IcebergMetadata : WithContext +class IcebergMetadata : public IDataLakeMetadata, private WithContext { public: - IcebergMetadata(const StorageS3::Configuration & configuration_, - ContextPtr context_, - Int32 metadata_version_, - Int32 format_version_, - String manifest_list_file_, - Int32 current_schema_id_, - NamesAndTypesList schema_); + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto name = "Iceberg"; + + IcebergMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_, + Int32 metadata_version_, + Int32 format_version_, + String manifest_list_file_, + Int32 current_schema_id_, + NamesAndTypesList schema_); /// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files. /// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file) - Strings getDataFiles(); + Strings getDataFiles() const override; /// Get table schema parsed from metadata. - NamesAndTypesList getTableSchema() const { return schema; } + NamesAndTypesList getTableSchema() const override { return schema; } - size_t getVersion() const { return metadata_version; } + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * iceberg_metadata = dynamic_cast(&other); + return iceberg_metadata && getVersion() == iceberg_metadata->getVersion(); + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context); private: - const StorageS3::Configuration configuration; + size_t getVersion() const { return metadata_version; } + + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; Int32 metadata_version; Int32 format_version; String manifest_list_file; Int32 current_schema_id; NamesAndTypesList schema; - Strings data_files; + mutable Strings data_files; LoggerPtr log; - }; -std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context); - } #endif diff --git a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp new file mode 100644 index 00000000000..0fa6402e892 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp @@ -0,0 +1,82 @@ +#include "config.h" + +#if USE_AWS_S3 + +#include +#include +#include +#include + + +namespace DB +{ + +#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. + +void registerStorageIceberg(StorageFactory & factory) +{ + factory.registerStorage( + "Iceberg", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageIceberg::create( + configuration, args.getContext(), args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.mode); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +#endif + +#if USE_PARQUET +void registerStorageDeltaLake(StorageFactory & factory) +{ + factory.registerStorage( + "DeltaLake", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageDeltaLake::create( + configuration, args.getContext(), args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.mode); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} +#endif + +void registerStorageHudi(StorageFactory & factory) +{ + factory.registerStorage( + "Hudi", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageHudi::create( + configuration, args.getContext(), args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.mode); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +} + +#endif diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp similarity index 99% rename from src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp index 6b6151f5474..21df7e35284 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -1,9 +1,9 @@ #include "AsynchronousReadBufferFromHDFS.h" #if USE_HDFS +#include "ReadBufferFromHDFS.h" #include #include -#include #include #include diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h similarity index 96% rename from src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h index 10e2749fd4a..5aef92315a4 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp new file mode 100644 index 00000000000..a8a9ab5b557 --- /dev/null +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -0,0 +1,217 @@ +#include + +#if USE_HDFS +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other) + : Configuration(other) +{ + url = other.url; + path = other.path; + paths = other.paths; +} + +void StorageHDFSConfiguration::check(ContextPtr context) const +{ + context->getRemoteHostFilter().checkURL(Poco::URI(url)); + checkHDFSURL(fs::path(url) / path.substr(1)); + Configuration::check(context); +} + +ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT + ContextPtr context, + bool /* is_readonly */) +{ + assertInitialized(); + const auto & settings = context->getSettingsRef(); + auto hdfs_settings = std::make_unique( + settings.remote_read_min_bytes_for_seek, + settings.hdfs_replication + ); + return std::make_shared( + url, std::move(hdfs_settings), context->getConfigRef(), /* lazy_initialize */true); +} + +std::string StorageHDFSConfiguration::getPathWithoutGlobs() const +{ + /// Unlike s3 and azure, which are object storages, + /// hdfs is a filesystem, so it cannot list files by partual prefix, + /// only by directory. + auto first_glob_pos = path.find_first_of("*?{"); + auto end_of_path_without_globs = path.substr(0, first_glob_pos).rfind('/'); + if (end_of_path_without_globs == std::string::npos || end_of_path_without_globs == 0) + return "/"; + return path.substr(0, end_of_path_without_globs); +} +StorageObjectStorage::QuerySettings StorageHDFSConfiguration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.hdfs_truncate_on_insert, + .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.hdfs_skip_empty_files, + .list_object_keys_size = 0, /// HDFS does not support listing in batches. + .throw_on_zero_files_match = settings.hdfs_throw_on_zero_files_match, + .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist, + }; +} + +void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure) +{ + const size_t max_args_num = with_structure ? 4 : 3; + if (args.empty() || args.size() > max_args_num) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Expected not more than {} arguments", max_args_num); + } + + std::string url_str; + url_str = checkAndGetLiteralArgument(args[0], "url"); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + if (args.size() > 1) + { + format = checkAndGetLiteralArgument(args[1], "format_name"); + } + + if (with_structure) + { + if (args.size() > 2) + { + structure = checkAndGetLiteralArgument(args[2], "structure"); + } + if (args.size() > 3) + { + compression_method = checkAndGetLiteralArgument(args[3], "compression_method"); + } + } + else if (args.size() > 2) + { + compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); + } + + setURL(url_str); +} + +void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection & collection) +{ + std::string url_str; + + auto filename = collection.getOrDefault("filename", ""); + if (!filename.empty()) + url_str = std::filesystem::path(collection.get("url")) / filename; + else + url_str = collection.get("url"); + + format = collection.getOrDefault("format", "auto"); + compression_method = collection.getOrDefault("compression_method", + collection.getOrDefault("compression", "auto")); + structure = collection.getOrDefault("structure", "auto"); + + setURL(url_str); +} + +void StorageHDFSConfiguration::setURL(const std::string & url_) +{ + auto pos = url_.find("//"); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_); + + pos = url_.find('/', pos + 2); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_); + + path = url_.substr(pos + 1); + if (!path.starts_with('/')) + path = '/' + path; + + url = url_.substr(0, pos); + paths = {path}; + + LOG_TRACE(getLogger("StorageHDFSConfiguration"), "Using url: {}, path: {}", url, path); +} + +void StorageHDFSConfiguration::addStructureAndFormatToArgs( + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + size_t count = args.size(); + if (count == 0 || count > 4) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Expected 1 to 4 arguments in table function, got {}", count); + } + + auto format_literal = std::make_shared(format_); + auto structure_literal = std::make_shared(structure_); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + /// hdfs(url) + if (count == 1) + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// hdfs(url, format) + else if (count == 2) + { + if (checkAndGetLiteralArgument(args[1], "format") == "auto") + args.back() = format_literal; + args.push_back(structure_literal); + } + /// hdfs(url, format, structure) + /// hdfs(url, format, structure, compression_method) + else if (count >= 3) + { + if (checkAndGetLiteralArgument(args[1], "format") == "auto") + args[1] = format_literal; + if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + args[2] = structure_literal; + } + } +} + +} + +#endif diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h new file mode 100644 index 00000000000..01a8b9c5e3b --- /dev/null +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -0,0 +1,60 @@ +#pragma once +#include "config.h" + +#if USE_HDFS +#include +#include +#include + +namespace DB +{ + +class StorageHDFSConfiguration : public StorageObjectStorage::Configuration +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto type_name = "hdfs"; + static constexpr auto engine_name = "HDFS"; + + StorageHDFSConfiguration() = default; + StorageHDFSConfiguration(const StorageHDFSConfiguration & other); + + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return engine_name; } + + Path getPath() const override { return path; } + void setPath(const Path & path_) override { path = path_; } + + const Paths & getPaths() const override { return paths; } + void setPaths(const Paths & paths_) override { paths = paths_; } + std::string getPathWithoutGlobs() const override; + + String getNamespace() const override { return ""; } + String getDataSourceDescription() const override { return url; } + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; + + void check(ContextPtr context) const override; + ConfigurationPtr clone() override { return std::make_shared(*this); } + + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; + + void addStructureAndFormatToArgs( + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) override; + +private: + void fromNamedCollection(const NamedCollection &) override; + void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override; + void setURL(const std::string & url_); + + String url; + String path; + std::vector paths; +}; + +} + +#endif diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp similarity index 98% rename from src/Storages/HDFS/HDFSCommon.cpp rename to src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp index f9a55a1285a..365828bcc40 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp @@ -1,4 +1,4 @@ -#include +#include "HDFSCommon.h" #include #include #include @@ -113,7 +113,6 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A const Poco::URI uri(uri_str); const auto & host = uri.getHost(); auto port = uri.getPort(); - const String path = "//"; if (host.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal HDFS URI: {}", uri.toString()); diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/ObjectStorage/HDFS/HDFSCommon.h similarity index 100% rename from src/Storages/HDFS/HDFSCommon.h rename to src/Storages/ObjectStorage/HDFS/HDFSCommon.h diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp similarity index 98% rename from src/Storages/HDFS/ReadBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index 4df05d47003..be339d021dc 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -1,11 +1,12 @@ #include "ReadBufferFromHDFS.h" #if USE_HDFS -#include +#include "HDFSCommon.h" #include #include #include #include +#include #include #include @@ -55,10 +56,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size) , hdfs_uri(hdfs_uri_) , hdfs_file_path(hdfs_file_path_) - , builder(createHDFSBuilder(hdfs_uri_, config_)) , read_settings(read_settings_) , read_until_position(read_until_position_) { + builder = createHDFSBuilder(hdfs_uri_, config_); fs = createHDFSFS(builder.get()); fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0); @@ -100,7 +101,9 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory {})", file_offset, read_until_position - 1); @@ -145,6 +148,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemoryadd(bytes_read, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); + return true; } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h similarity index 100% rename from src/Storages/HDFS/ReadBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp similarity index 93% rename from src/Storages/HDFS/WriteBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp index 173dd899ada..2c14b38ce01 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp @@ -2,8 +2,8 @@ #if USE_HDFS -#include -#include +#include "WriteBufferFromHDFS.h" +#include "HDFSCommon.h" #include #include #include @@ -48,12 +48,13 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); const String path = hdfs_uri.substr(begin_of_path); - fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); if (fout == nullptr) { - throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} error: {}", - path, std::string(hdfsGetLastError())); + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} ({}) error: {}", + path, hdfs_uri, std::string(hdfsGetLastError())); } } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h similarity index 100% rename from src/Storages/HDFS/WriteBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp new file mode 100644 index 00000000000..78cdc442f64 --- /dev/null +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -0,0 +1,289 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int CANNOT_DETECT_FORMAT; +} + +ReadBufferIterator::ReadBufferIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const FileIterator & file_iterator_, + const std::optional & format_settings_, + SchemaCache & schema_cache_, + ObjectInfos & read_keys_, + const ContextPtr & context_) + : WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) + , file_iterator(file_iterator_) + , format_settings(format_settings_) + , query_settings(configuration->getQuerySettings(context_)) + , schema_cache(schema_cache_) + , read_keys(read_keys_) + , prev_read_keys_size(read_keys_.size()) +{ + if (configuration->format != "auto") + format = configuration->format; +} + +SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const +{ + auto source = StorageObjectStorageSource::getUniqueStoragePathIdentifier(*configuration, object_info); + return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext()); +} + +SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const +{ + Strings sources; + sources.reserve(read_keys.size()); + std::transform( + read_keys.begin(), read_keys.end(), + std::back_inserter(sources), + [&](const auto & elem) + { + return StorageObjectStorageSource::getUniqueStoragePathIdentifier(*configuration, *elem); + }); + return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext()); +} + +std::optional ReadBufferIterator::tryGetColumnsFromCache( + const ObjectInfos::iterator & begin, + const ObjectInfos::iterator & end) +{ + if (!query_settings.schema_inference_use_cache) + return std::nullopt; + + for (auto it = begin; it < end; ++it) + { + const auto & object_info = (*it); + auto get_last_mod_time = [&] -> std::optional + { + const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath(); + if (!object_info->metadata) + object_info->metadata = object_storage->tryGetObjectMetadata(path); + + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; + }; + + if (format) + { + const auto cache_key = getKeyForSchemaCache(*object_info, *format); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + const auto cache_key = getKeyForSchemaCache(*object_info, format_name); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; + } + } + } + } + return std::nullopt; +} + +void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows) +{ + if (query_settings.schema_inference_use_cache) + schema_cache.addNumRows(getKeyForSchemaCache(*current_object_info, *format), num_rows); +} + +void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) +{ + if (query_settings.schema_inference_use_cache + && query_settings.schema_inference_mode == SchemaInferenceMode::UNION) + { + schema_cache.addColumns(getKeyForSchemaCache(*current_object_info, *format), columns); + } +} + +void ReadBufferIterator::setResultingSchema(const ColumnsDescription & columns) +{ + if (query_settings.schema_inference_use_cache + && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + schema_cache.addManyColumns(getKeysForSchemaCache(), columns); + } +} + +void ReadBufferIterator::setFormatName(const String & format_name) +{ + format = format_name; +} + +String ReadBufferIterator::getLastFileName() const +{ + if (current_object_info) + return current_object_info->getPath(); + else + return ""; +} + +std::unique_ptr ReadBufferIterator::recreateLastReadBuffer() +{ + auto context = getContext(); + + const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath(); + auto impl = object_storage->readObject(StoredObject(path), context->getReadSettings()); + + const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method); + const auto zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); + + return wrapReadBufferWithCompressionMethod(std::move(impl), compression_method, zstd_window_log_max); +} + +ReadBufferIterator::Data ReadBufferIterator::next() +{ + if (first) + { + /// If format is unknown we iterate through all currently read keys on first iteration and + /// try to determine format by file name. + if (!format) + { + for (const auto & object_info : read_keys) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName())) + { + format = format_from_file_name; + break; + } + } + } + + /// For default mode check cached columns for currently read keys on first iteration. + if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + { + return {nullptr, cached_columns, format}; + } + } + } + + while (true) + { + current_object_info = file_iterator->next(0); + + if (!current_object_info) + { + if (first) + { + if (format.has_value()) + { + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, " + "because there are no files with provided path " + "in {} or all files are empty. You can specify table structure manually", + *format, object_storage->getName()); + } + + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, " + "because there are no files with provided path " + "in {} or all files are empty. You can specify the format manually", + object_storage->getName()); + } + + return {nullptr, std::nullopt, format}; + } + + const auto filename = current_object_info->getFileName(); + chassert(!filename.empty()); + + /// file iterator could get new keys after new iteration + if (read_keys.size() > prev_read_keys_size) + { + /// If format is unknown we can try to determine it by new file names. + if (!format) + { + for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) + { + format = format_from_file_name; + break; + } + } + } + + /// Check new files in schema cache if schema inference mode is default. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + if (columns_from_cache) + return {nullptr, columns_from_cache, format}; + } + + prev_read_keys_size = read_keys.size(); + } + + if (query_settings.skip_empty_files + && current_object_info->metadata && current_object_info->metadata->size_bytes == 0) + continue; + + /// In union mode, check cached columns only for current key. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) + { + ObjectInfos objects{current_object_info}; + if (auto columns_from_cache = tryGetColumnsFromCache(objects.begin(), objects.end())) + { + first = false; + return {nullptr, columns_from_cache, format}; + } + } + + std::unique_ptr read_buf; + CompressionMethod compression_method; + using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive; + if (const auto * object_info_in_archive = dynamic_cast(current_object_info.get())) + { + compression_method = chooseCompressionMethod(filename, configuration->compression_method); + const auto & archive_reader = object_info_in_archive->archive_reader; + read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); + } + else + { + compression_method = chooseCompressionMethod(filename, configuration->compression_method); + read_buf = object_storage->readObject( + StoredObject(current_object_info->getPath()), + getContext()->getReadSettings(), + {}, + current_object_info->metadata->size_bytes); + } + + if (!query_settings.skip_empty_files || !read_buf->eof()) + { + first = false; + + read_buf = wrapReadBufferWithCompressionMethod( + std::move(read_buf), + compression_method, + static_cast(getContext()->getSettingsRef().zstd_window_log_max)); + + return {std::move(read_buf), std::nullopt, format}; + } + } +} +} diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h new file mode 100644 index 00000000000..6eeb52ec2ed --- /dev/null +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -0,0 +1,63 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ + +class ReadBufferIterator : public IReadBufferIterator, WithContext +{ +public: + using FileIterator = std::shared_ptr; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr; + using ObjectInfo = StorageObjectStorage::ObjectInfo; + using ObjectInfos = StorageObjectStorage::ObjectInfos; + + ReadBufferIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const FileIterator & file_iterator_, + const std::optional & format_settings_, + SchemaCache & schema_cache_, + ObjectInfos & read_keys_, + const ContextPtr & context_); + + Data next() override; + + void setNumRowsToLastFile(size_t num_rows) override; + + void setSchemaToLastFile(const ColumnsDescription & columns) override; + + void setResultingSchema(const ColumnsDescription & columns) override; + + String getLastFileName() const override; + + void setFormatName(const String & format_name) override; + + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override; + +private: + SchemaCache::Key getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const; + SchemaCache::Keys getKeysForSchemaCache() const; + std::optional tryGetColumnsFromCache( + const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end); + + ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const FileIterator file_iterator; + const std::optional & format_settings; + const StorageObjectStorage::QuerySettings query_settings; + SchemaCache & schema_cache; + ObjectInfos & read_keys; + std::optional format; + + size_t prev_read_keys_size; + ObjectInfoPtr current_object_info; + bool first = true; +}; +} diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp new file mode 100644 index 00000000000..4b217b94730 --- /dev/null +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -0,0 +1,472 @@ +#include + +#if USE_AWS_S3 +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; +} + +static const std::unordered_set required_configuration_keys = { + "url", +}; + +static const std::unordered_set optional_configuration_keys = { + "format", + "compression", + "compression_method", + "structure", + "access_key_id", + "secret_access_key", + "session_token", + "filename", + "use_environment_credentials", + "max_single_read_retries", + "min_upload_part_size", + "upload_part_size_multiply_factor", + "upload_part_size_multiply_parts_count_threshold", + "max_single_part_upload_size", + "max_connections", + "expiration_window_seconds", + "no_sign_request" +}; + +String StorageS3Configuration::getDataSourceDescription() const +{ + return std::filesystem::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket; +} + +std::string StorageS3Configuration::getPathInArchive() const +{ + if (url.archive_pattern.has_value()) + return url.archive_pattern.value(); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not an archive", getPath()); +} + +void StorageS3Configuration::check(ContextPtr context) const +{ + validateNamespace(url.bucket); + context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri); + context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast); + Configuration::check(context); +} + +void StorageS3Configuration::validateNamespace(const String & name) const +{ + S3::URI::validateBucket(name, {}); +} + +StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other) + : Configuration(other) +{ + url = other.url; + static_configuration = other.static_configuration; + headers_from_ast = other.headers_from_ast; + keys = other.keys; +} + +StorageObjectStorage::QuerySettings StorageS3Configuration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.s3_truncate_on_insert, + .create_new_file_on_insert = settings.s3_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.s3_skip_empty_files, + .list_object_keys_size = settings.s3_list_object_keys_size, + .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist, + }; +} + +ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT +{ + assertInitialized(); + + const auto & config = context->getConfigRef(); + const auto & settings = context->getSettingsRef(); + const std::string config_prefix = "s3."; + + auto s3_settings = getSettings(config, config_prefix, context, settings.s3_validate_request_settings); + + request_settings.updateFromSettingsIfChanged(settings); + auth_settings.updateFrom(s3_settings->auth_settings); + + s3_settings->auth_settings = auth_settings; + s3_settings->request_settings = request_settings; + + if (!headers_from_ast.empty()) + { + s3_settings->auth_settings.headers.insert( + s3_settings->auth_settings.headers.end(), + headers_from_ast.begin(), headers_from_ast.end()); + } + + if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName())) + s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); + + auto client = getClient(config, config_prefix, context, *s3_settings, false, &url); + auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key); + auto s3_capabilities = S3Capabilities + { + .support_batch_delete = config.getBool("s3.support_batch_delete", true), + .support_proxy = config.getBool("s3.support_proxy", config.has("s3.proxy")), + }; + + return std::make_shared( + std::move(client), std::move(s3_settings), url, s3_capabilities, + key_generator, "StorageS3", false); +} + +void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection) +{ + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + + auto filename = collection.getOrDefault("filename", ""); + if (!filename.empty()) + url = S3::URI(std::filesystem::path(collection.get("url")) / filename); + else + url = S3::URI(collection.get("url")); + + auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); + auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); + auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); + auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); + auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); + + format = collection.getOrDefault("format", format); + compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + structure = collection.getOrDefault("structure", "auto"); + + request_settings = S3Settings::RequestSettings(collection); + + static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); + + keys = {url.key}; +} + +void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure) +{ + /// Supported signatures: S3('url') S3('url', 'format') S3('url', 'format', 'compression') S3('url', NOSIGN) S3('url', NOSIGN, 'format') S3('url', NOSIGN, 'format', 'compression') S3('url', 'aws_access_key_id', 'aws_secret_access_key') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') + /// with optional headers() function + + size_t count = StorageURL::evalArgsAndCollectHeaders(args, headers_from_ast, context); + + if (count == 0 || count > (with_structure ? 7 : 6)) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage S3 requires 1 to 5 arguments: " + "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); + + std::unordered_map engine_args_to_idx; + bool no_sign_request = false; + + /// For 2 arguments we support 2 possible variants: + /// - s3(source, format) + /// - s3(source, NOSIGN) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. + if (count == 2) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + no_sign_request = true; + else + engine_args_to_idx = {{"format", 1}}; + } + /// For 3 arguments we support 2 possible variants: + /// - s3(source, format, compression_method) + /// - s3(source, access_key_id, secret_access_key) + /// - s3(source, NOSIGN, format) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. + else if (count == 3) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + engine_args_to_idx = {{"format", 2}}; + } + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) + { + if (with_structure) + engine_args_to_idx = {{"format", 1}, {"structure", 2}}; + else + engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; + } + else + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; + } + /// For 4 arguments we support 3 possible variants: + /// if with_structure == 0: + /// - s3(source, access_key_id, secret_access_key, session_token) + /// - s3(source, access_key_id, secret_access_key, format) + /// - s3(source, NOSIGN, format, compression_method) + /// if with_structure == 1: + /// - s3(source, format, structure, compression_method), + /// - s3(source, access_key_id, secret_access_key, format), + /// - s3(source, access_key_id, secret_access_key, session_token) + /// - s3(source, NOSIGN, format, structure) + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. + else if (count == 4) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "access_key_id/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + if (with_structure) + engine_args_to_idx = {{"format", 2}, {"structure", 3}}; + else + engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; + } + else if (with_structure && (second_arg == "auto" || FormatFactory::instance().exists(second_arg))) + { + engine_args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; + } + } + } + /// For 5 arguments we support 2 possible variants: + /// if with_structure == 0: + /// - s3(source, access_key_id, secret_access_key, session_token, format) + /// - s3(source, access_key_id, secret_access_key, format, compression) + /// if with_structure == 1: + /// - s3(source, access_key_id, secret_access_key, format, structure) + /// - s3(source, access_key_id, secret_access_key, session_token, format) + /// - s3(source, NOSIGN, format, structure, compression_method) + else if (count == 5) + { + if (with_structure) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + engine_args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}}; + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } + } + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } + } + } + else if (count == 6) + { + if (with_structure) + { + /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method) + /// - s3(source, access_key_id, secret_access_key, session_token, format, structure) + /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; + } + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; + } + } + else if (with_structure && count == 7) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}; + } + + /// This argument is always the first + url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); + + if (engine_args_to_idx.contains("format")) + { + format = checkAndGetLiteralArgument(args[engine_args_to_idx["format"]], "format"); + /// Set format to configuration only of it's not 'auto', + /// because we can have default format set in configuration. + if (format != "auto") + format = format; + } + + if (engine_args_to_idx.contains("structure")) + structure = checkAndGetLiteralArgument(args[engine_args_to_idx["structure"]], "structure"); + + if (engine_args_to_idx.contains("compression_method")) + compression_method = checkAndGetLiteralArgument(args[engine_args_to_idx["compression_method"]], "compression_method"); + + if (engine_args_to_idx.contains("access_key_id")) + auth_settings.access_key_id = checkAndGetLiteralArgument(args[engine_args_to_idx["access_key_id"]], "access_key_id"); + + if (engine_args_to_idx.contains("secret_access_key")) + auth_settings.secret_access_key = checkAndGetLiteralArgument(args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); + + if (engine_args_to_idx.contains("session_token")) + auth_settings.session_token = checkAndGetLiteralArgument(args[engine_args_to_idx["session_token"]], "session_token"); + + if (no_sign_request) + auth_settings.no_sign_request = no_sign_request; + + static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); + auth_settings.no_sign_request = no_sign_request; + + keys = {url.key}; +} + +void StorageS3Configuration::addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + HTTPHeaderEntries tmp_headers; + size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); + + if (count == 0 || count > 6) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to 6 arguments in table function, got {}", count); + + auto format_literal = std::make_shared(format_); + auto structure_literal = std::make_shared(structure_); + + /// s3(s3_url) + if (count == 1) + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// s3(s3_url, format) or s3(s3_url, NOSIGN) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. + else if (count == 2) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + /// If there is NOSIGN, add format=auto before structure. + if (boost::iequals(second_arg, "NOSIGN")) + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// s3(source, format, structure) or + /// s3(source, access_key_id, secret_access_key) or + /// s3(source, NOSIGN, format) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. + else if (count == 3) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + args.push_back(structure_literal); + } + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) + { + args[count - 1] = structure_literal; + } + else + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + } + /// s3(source, format, structure, compression_method) or + /// s3(source, access_key_id, secret_access_key, format) or + /// s3(source, NOSIGN, format, structure) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. + else if (count == 4) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + args[count - 1] = structure_literal; + } + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) + { + args[count - 2] = structure_literal; + } + else + { + args.push_back(structure_literal); + } + } + /// s3(source, access_key_id, secret_access_key, format, structure) or + /// s3(source, NOSIGN, format, structure, compression_method) + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. + else if (count == 5) + { + auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(sedond_arg, "NOSIGN")) + { + args[count - 2] = structure_literal; + } + else + { + args[count - 1] = structure_literal; + } + } + /// s3(source, access_key_id, secret_access_key, format, structure, compression) + else if (count == 6) + { + args[count - 2] = structure_literal; + } + } +} + +} + +#endif diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h new file mode 100644 index 00000000000..906d10a1a9a --- /dev/null +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -0,0 +1,70 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 +#include +#include + +namespace DB +{ + +class StorageS3Configuration : public StorageObjectStorage::Configuration +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + static constexpr auto type_name = "s3"; + static constexpr auto namespace_name = "bucket"; + + StorageS3Configuration() = default; + StorageS3Configuration(const StorageS3Configuration & other); + + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return url.storage_name; } + std::string getNamespaceType() const override { return namespace_name; } + + Path getPath() const override { return url.key; } + void setPath(const Path & path) override { url.key = path; } + + const Paths & getPaths() const override { return keys; } + void setPaths(const Paths & paths) override { keys = paths; } + + String getNamespace() const override { return url.bucket; } + String getDataSourceDescription() const override; + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; + + bool isArchive() const override { return url.archive_pattern.has_value(); } + std::string getPathInArchive() const override; + + void check(ContextPtr context) const override; + void validateNamespace(const String & name) const override; + ConfigurationPtr clone() override { return std::make_shared(*this); } + bool isStaticConfiguration() const override { return static_configuration; } + + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; + + void addStructureAndFormatToArgs( + ASTs & args, + const String & structure, + const String & format, + ContextPtr context) override; + +private: + void fromNamedCollection(const NamedCollection & collection) override; + void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + + S3::URI url; + std::vector keys; + + S3::AuthSettings auth_settings; + S3Settings::RequestSettings request_settings; + HTTPHeaderEntries headers_from_ast; /// Headers from ast is a part of static configuration. + /// If s3 configuration was passed from ast, then it is static. + /// If from config - it can be changed with config reload. + bool static_configuration = true; +}; + +} + +#endif diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp new file mode 100644 index 00000000000..2c8e60b49d0 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -0,0 +1,503 @@ +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DATABASE_ACCESS_DENIED; + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; +} + +StorageObjectStorage::StorageObjectStorage( + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + ContextPtr context, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_, + bool distributed_processing_, + ASTPtr partition_by_) + : IStorage(table_id_) + , configuration(configuration_) + , object_storage(object_storage_) + , format_settings(format_settings_) + , partition_by(partition_by_) + , distributed_processing(distributed_processing_) + , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName()))) +{ + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context); + configuration->check(context); + + StorageInMemoryMetadata metadata; + metadata.setColumns(columns); + metadata.setConstraints(constraints_); + metadata.setComment(comment); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + setInMemoryMetadata(metadata); +} + +String StorageObjectStorage::getName() const +{ + return configuration->getEngineName(); +} + +bool StorageObjectStorage::prefersLargeBlocks() const +{ + return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration->format); +} + +bool StorageObjectStorage::parallelizeOutputAfterReading(ContextPtr context) const +{ + return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration->format, context); +} + +bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) const +{ + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context, format_settings); +} + +void StorageObjectStorage::updateConfiguration(ContextPtr context) +{ + IObjectStorage::ApplyNewSettingsOptions options{ .allow_client_change = !configuration->isStaticConfiguration() }; + object_storage->applyNewSettings(context->getConfigRef(), configuration->getTypeName() + ".", context, options); +} + +namespace +{ +class ReadFromObjectStorageStep : public SourceStepWithFilter +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + ReadFromObjectStorageStep( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const String & name_, + const Names & columns_to_read, + const NamesAndTypesList & virtual_columns_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const std::optional & format_settings_, + bool distributed_processing_, + ReadFromFormatInfo info_, + const bool need_only_count_, + ContextPtr context_, + size_t max_block_size_, + size_t num_streams_) + : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) + , object_storage(object_storage_) + , configuration(configuration_) + , info(std::move(info_)) + , virtual_columns(virtual_columns_) + , format_settings(format_settings_) + , name(name_ + "Source") + , need_only_count(need_only_count_) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + , distributed_processing(distributed_processing_) + { + } + + std::string getName() const override { return name; } + + void applyFilters(ActionDAGNodes added_filter_nodes) override + { + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + createIterator(predicate); + } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + createIterator(nullptr); + + Pipes pipes; + auto context = getContext(); + const size_t max_threads = context->getSettingsRef().max_threads; + size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); + + if (estimated_keys_count > 1) + num_streams = std::min(num_streams, estimated_keys_count); + else + { + /// The amount of keys (zero) was probably underestimated. + /// We will keep one stream for this particular case. + num_streams = 1; + } + + const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); + + for (size_t i = 0; i < num_streams; ++i) + { + auto source = std::make_shared( + getName(), object_storage, configuration, info, format_settings, + context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count); + + source->setKeyCondition(filter_actions_dag, context); + pipes.emplace_back(std::move(source)); + } + + auto pipe = Pipe::unitePipes(std::move(pipes)); + if (pipe.empty()) + pipe = Pipe(std::make_shared(info.source_header)); + + for (const auto & processor : pipe.getProcessors()) + processors.emplace_back(processor); + + pipeline.init(std::move(pipe)); + } + +private: + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + std::shared_ptr iterator_wrapper; + + const ReadFromFormatInfo info; + const NamesAndTypesList virtual_columns; + const std::optional format_settings; + const String name; + const bool need_only_count; + const size_t max_block_size; + size_t num_streams; + const bool distributed_processing; + + void createIterator(const ActionsDAG::Node * predicate) + { + if (iterator_wrapper) + return; + auto context = getContext(); + iterator_wrapper = StorageObjectStorageSource::createFileIterator( + configuration, object_storage, distributed_processing, + context, predicate, virtual_columns, nullptr, context->getFileProgressCallback()); + } +}; +} + +void StorageObjectStorage::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + updateConfiguration(local_context); + if (partition_by && configuration->withPartitionWildcard()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from a partitioned {} storage is not implemented yet", + getName()); + } + + const auto read_from_format_info = prepareReadingFromFormat( + column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + && local_context->getSettingsRef().optimize_count_from_files; + + auto read_step = std::make_unique( + object_storage, + configuration, + getName(), + column_names, + getVirtualsList(), + query_info, + storage_snapshot, + format_settings, + distributed_processing, + read_from_format_info, + need_only_count, + local_context, + max_block_size, + num_streams); + + query_plan.addStep(std::move(read_step)); +} + +SinkToStoragePtr StorageObjectStorage::write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context, + bool /* async_insert */) +{ + updateConfiguration(local_context); + const auto sample_block = metadata_snapshot->getSampleBlock(); + const auto & settings = configuration->getQuerySettings(local_context); + + if (configuration->isArchive()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Path '{}' contains archive. Write into archive is not supported", + configuration->getPath()); + } + + if (configuration->withGlobsIgnorePartitionWildcard()) + { + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Path '{}' contains globs, so the table is in readonly mode", + configuration->getPath()); + } + + if (configuration->withPartitionWildcard()) + { + ASTPtr partition_by_ast = nullptr; + if (auto insert_query = std::dynamic_pointer_cast(query)) + { + if (insert_query->partition_by) + partition_by_ast = insert_query->partition_by; + else + partition_by_ast = partition_by; + } + + if (partition_by_ast) + { + return std::make_shared( + object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast); + } + } + + auto paths = configuration->getPaths(); + if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( + *object_storage, *configuration, settings, paths.front(), paths.size())) + { + paths.push_back(*new_key); + } + configuration->setPaths(paths); + + return std::make_shared( + object_storage, + configuration->clone(), + format_settings, + sample_block, + local_context); +} + +void StorageObjectStorage::truncate( + const ASTPtr & /* query */, + const StorageMetadataPtr & /* metadata_snapshot */, + ContextPtr /* context */, + TableExclusiveLockHolder & /* table_holder */) +{ + if (configuration->isArchive()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Path '{}' contains archive. Table cannot be truncated", + configuration->getPath()); + } + + if (configuration->withGlobs()) + { + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "{} key '{}' contains globs, so the table is in readonly mode and cannot be truncated", + getName(), configuration->getPath()); + } + + StoredObjects objects; + for (const auto & key : configuration->getPaths()) + objects.emplace_back(key); + + object_storage->removeObjectsIfExist(objects); +} + +std::unique_ptr StorageObjectStorage::createReadBufferIterator( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + ObjectInfos & read_keys, + const ContextPtr & context) +{ + auto file_iterator = StorageObjectStorageSource::createFileIterator( + configuration, + object_storage, + false/* distributed_processing */, + context, + {}/* predicate */, + {}/* virtual_columns */, + &read_keys); + + return std::make_unique( + object_storage, configuration, file_iterator, + format_settings, getSchemaCache(context, configuration->getTypeName()), read_keys, context); +} + +ColumnsDescription StorageObjectStorage::resolveSchemaFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + return readSchemaFromFormat(configuration->format, format_settings, *iterator, context); +} + +std::string StorageObjectStorage::resolveFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + return detectFormatAndReadSchema(format_settings, *iterator, context).second; +} + +std::pair StorageObjectStorage::resolveSchemaAndFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context); + configuration->format = format; + return std::pair(columns, format); +} + +SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name) +{ + if (storage_type_name == "s3") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_s3", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else if (storage_type_name == "hdfs") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_hdfs", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else if (storage_type_name == "azure") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_azure", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name); +} + +void StorageObjectStorage::Configuration::initialize( + Configuration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure) +{ + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + configuration.fromNamedCollection(*named_collection); + else + configuration.fromAST(engine_args, local_context, with_table_structure); + + if (configuration.format == "auto") + { + configuration.format = FormatFactory::instance().tryGetFormatFromFileName( + configuration.isArchive() + ? configuration.getPathInArchive() + : configuration.getPath()).value_or("auto"); + } + else + FormatFactory::instance().checkFormatName(configuration.format); + + configuration.initialized = true; +} + +void StorageObjectStorage::Configuration::check(ContextPtr) const +{ + FormatFactory::instance().checkFormatName(format); +} + +StorageObjectStorage::Configuration::Configuration(const Configuration & other) +{ + format = other.format; + compression_method = other.compression_method; + structure = other.structure; +} + +bool StorageObjectStorage::Configuration::withPartitionWildcard() const +{ + static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + return getPath().find(PARTITION_ID_WILDCARD) != String::npos + || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos; +} + +bool StorageObjectStorage::Configuration::withGlobsIgnorePartitionWildcard() const +{ + if (!withPartitionWildcard()) + return withGlobs(); + else + return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; +} + +bool StorageObjectStorage::Configuration::isPathWithGlobs() const +{ + return getPath().find_first_of("*?{") != std::string::npos; +} + +bool StorageObjectStorage::Configuration::isNamespaceWithGlobs() const +{ + return getNamespace().find_first_of("*?{") != std::string::npos; +} + +std::string StorageObjectStorage::Configuration::getPathWithoutGlobs() const +{ + return getPath().substr(0, getPath().find_first_of("*?{")); +} + +bool StorageObjectStorage::Configuration::isPathInArchiveWithGlobs() const +{ + return getPathInArchive().find_first_of("*?{") != std::string::npos; +} + +std::string StorageObjectStorage::Configuration::getPathInArchive() const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not archive", getPath()); +} + +void StorageObjectStorage::Configuration::assertInitialized() const +{ + if (!initialized) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration was not initialized before usage"); + } +} +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h new file mode 100644 index 00000000000..f45d8c1f01a --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -0,0 +1,204 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class ReadBufferIterator; +class SchemaCache; +class NamedCollection; + +/** + * A general class containing implementation for external table engines + * such as StorageS3, StorageAzure, StorageHDFS. + * Works with an object of IObjectStorage class. + */ +class StorageObjectStorage : public IStorage +{ +public: + class Configuration; + using ConfigurationPtr = std::shared_ptr; + using ObjectInfo = RelativePathWithMetadata; + using ObjectInfoPtr = std::shared_ptr; + using ObjectInfos = std::vector; + + struct QuerySettings + { + /// Insert settings: + bool truncate_on_insert; + bool create_new_file_on_insert; + + /// Schema inference settings: + bool schema_inference_use_cache; + SchemaInferenceMode schema_inference_mode; + + /// List settings: + bool skip_empty_files; + size_t list_object_keys_size; + bool throw_on_zero_files_match; + bool ignore_non_existent_file; + }; + + StorageObjectStorage( + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + ContextPtr context_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_, + bool distributed_processing_ = false, + ASTPtr partition_by_ = nullptr); + + String getName() const override; + + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + SinkToStoragePtr write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + bool async_insert) override; + + void truncate( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context, + TableExclusiveLockHolder &) override; + + bool supportsPartitionBy() const override { return true; } + + bool supportsSubcolumns() const override { return true; } + + bool supportsDynamicSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } + + bool supportsSubsetOfColumns(const ContextPtr & context) const; + + bool prefersLargeBlocks() const override; + + bool parallelizeOutputAfterReading(ContextPtr context) const override; + + static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name); + + static ColumnsDescription resolveSchemaFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context); + + static std::string resolveFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context); + + static std::pair resolveSchemaAndFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context); + +protected: + virtual void updateConfiguration(ContextPtr local_context); + + static std::unique_ptr createReadBufferIterator( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + ObjectInfos & read_keys, + const ContextPtr & context); + + ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + const std::optional format_settings; + const ASTPtr partition_by; + const bool distributed_processing; + + LoggerPtr log; +}; + +class StorageObjectStorage::Configuration +{ +public: + Configuration() = default; + Configuration(const Configuration & other); + virtual ~Configuration() = default; + + using Path = std::string; + using Paths = std::vector; + + static void initialize( + Configuration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure); + + /// Storage type: s3, hdfs, azure. + virtual std::string getTypeName() const = 0; + /// Engine name: S3, HDFS, Azure. + virtual std::string getEngineName() const = 0; + /// Sometimes object storages have something similar to chroot or namespace, for example + /// buckets in S3. If object storage doesn't have any namepaces return empty string. + virtual std::string getNamespaceType() const { return "namespace"; } + + virtual Path getPath() const = 0; + virtual void setPath(const Path & path) = 0; + + virtual const Paths & getPaths() const = 0; + virtual void setPaths(const Paths & paths) = 0; + + virtual String getDataSourceDescription() const = 0; + virtual String getNamespace() const = 0; + + virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0; + virtual void addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0; + + bool withPartitionWildcard() const; + bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } + bool withGlobsIgnorePartitionWildcard() const; + bool isPathWithGlobs() const; + bool isNamespaceWithGlobs() const; + virtual std::string getPathWithoutGlobs() const; + + virtual bool isArchive() const { return false; } + bool isPathInArchiveWithGlobs() const; + virtual std::string getPathInArchive() const; + + virtual void check(ContextPtr context) const; + virtual void validateNamespace(const String & /* name */) const {} + + virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) = 0; + virtual ConfigurationPtr clone() = 0; + virtual bool isStaticConfiguration() const { return true; } + + String format = "auto"; + String compression_method = "auto"; + String structure = "auto"; + +protected: + virtual void fromNamedCollection(const NamedCollection & collection) = 0; + virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; + + void assertInitialized() const; + + bool initialized = false; +}; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp new file mode 100644 index 00000000000..78f568d8ae2 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -0,0 +1,100 @@ +#include "Storages/ObjectStorage/StorageObjectStorageCluster.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +StorageObjectStorageCluster::StorageObjectStorageCluster( + const String & cluster_name_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_) + : IStorageCluster( + cluster_name_, table_id_, getLogger(fmt::format("{}({})", configuration_->getEngineName(), table_id_.table_name))) + , configuration{configuration_} + , object_storage(object_storage_) +{ + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, context_); + configuration->check(context_); + + StorageInMemoryMetadata metadata; + metadata.setColumns(columns); + metadata.setConstraints(constraints_); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + setInMemoryMetadata(metadata); +} + +std::string StorageObjectStorageCluster::getName() const +{ + return configuration->getEngineName(); +} + +void StorageObjectStorageCluster::updateQueryToSendIfNeeded( + ASTPtr & query, + const DB::StorageSnapshotPtr & storage_snapshot, + const ContextPtr & context) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected SELECT query from table function {}, got '{}'", + configuration->getEngineName(), queryToString(query)); + } + + ASTs & args = expression_list->children; + const auto & structure = storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(); + if (args.empty()) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected empty list of arguments for {}Cluster table function", + configuration->getEngineName()); + } + + ASTPtr cluster_name_arg = args.front(); + args.erase(args.begin()); + configuration->addStructureAndFormatToArgs(args, structure, configuration->format, context); + args.insert(args.begin(), cluster_name_arg); +} + +RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExtension( + const ActionsDAG::Node * predicate, const ContextPtr & local_context) const +{ + auto iterator = StorageObjectStorageSource::createFileIterator( + configuration, object_storage, /* distributed_processing */false, local_context, + predicate, virtual_columns, nullptr, local_context->getFileProgressCallback()); + + auto callback = std::make_shared>([iterator]() mutable -> String + { + auto object_info = iterator->next(0); + if (object_info) + return object_info->getPath(); + else + return ""; + }); + return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; +} + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h new file mode 100644 index 00000000000..69fec2b3c77 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -0,0 +1,48 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class Context; + +class StorageObjectStorageCluster : public IStorageCluster +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + StorageObjectStorageCluster( + const String & cluster_name_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_); + + std::string getName() const override; + + bool supportsSubcolumns() const override { return true; } + + bool supportsDynamicSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } + + RemoteQueryExecutor::Extension getTaskIteratorExtension( + const ActionsDAG::Node * predicate, const ContextPtr & context) const override; + +private: + void updateQueryToSendIfNeeded( + ASTPtr & query, + const StorageSnapshotPtr & storage_snapshot, + const ContextPtr & context) override; + + const String engine_name; + const StorageObjectStorage::ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + NamesAndTypesList virtual_columns; +}; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp new file mode 100644 index 00000000000..0a3cf19a590 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -0,0 +1,168 @@ +#include "StorageObjectStorageSink.h" +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_TEXT; + extern const int BAD_ARGUMENTS; +} + +StorageObjectStorageSink::StorageObjectStorageSink( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + const std::optional & format_settings_, + const Block & sample_block_, + ContextPtr context, + const std::string & blob_path) + : SinkToStorage(sample_block_) + , sample_block(sample_block_) +{ + const auto & settings = context->getSettingsRef(); + const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path; + const auto chosen_compression_method = chooseCompressionMethod(path, configuration->compression_method); + + auto buffer = object_storage->writeObject( + StoredObject(path), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings()); + + write_buf = wrapWriteBufferWithCompressionMethod( + std::move(buffer), + chosen_compression_method, + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); + + writer = FormatFactory::instance().getOutputFormatParallelIfPossible( + configuration->format, *write_buf, sample_block, context, format_settings_); +} + +void StorageObjectStorageSink::consume(Chunk chunk) +{ + std::lock_guard lock(cancel_mutex); + if (cancelled) + return; + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); +} + +void StorageObjectStorageSink::onCancel() +{ + std::lock_guard lock(cancel_mutex); + finalize(); + cancelled = true; +} + +void StorageObjectStorageSink::onException(std::exception_ptr exception) +{ + std::lock_guard lock(cancel_mutex); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization. + release(); + } +} + +void StorageObjectStorageSink::onFinish() +{ + std::lock_guard lock(cancel_mutex); + finalize(); +} + +void StorageObjectStorageSink::finalize() +{ + if (!writer) + return; + + try + { + writer->finalize(); + writer->flush(); + write_buf->finalize(); + } + catch (...) + { + /// Stop ParallelFormattingOutputFormat correctly. + release(); + throw; + } +} + +void StorageObjectStorageSink::release() +{ + writer.reset(); + write_buf.reset(); +} + +PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::optional format_settings_, + const Block & sample_block_, + ContextPtr context_, + const ASTPtr & partition_by) + : PartitionedSink(partition_by, context_, sample_block_) + , object_storage(object_storage_) + , configuration(configuration_) + , query_settings(configuration_->getQuerySettings(context_)) + , format_settings(format_settings_) + , sample_block(sample_block_) + , context(context_) +{ +} + +SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String & partition_id) +{ + auto partition_bucket = replaceWildcards(configuration->getNamespace(), partition_id); + validateNamespace(partition_bucket); + + auto partition_key = replaceWildcards(configuration->getPath(), partition_id); + validateKey(partition_key); + + if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( + *object_storage, *configuration, query_settings, partition_key, /* sequence_number */1)) + { + partition_key = *new_key; + } + + return std::make_shared( + object_storage, + configuration, + format_settings, + sample_block, + context, + partition_key + ); +} + +void PartitionedStorageObjectStorageSink::validateKey(const String & str) +{ + /// See: + /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html + /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject + + if (str.empty() || str.size() > 1024) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); + + if (!UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); + + validatePartitionKey(str, true); +} + +void PartitionedStorageObjectStorageSink::validateNamespace(const String & str) +{ + configuration->validateNamespace(str); + + if (!UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); + + validatePartitionKey(str, false); +} + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h new file mode 100644 index 00000000000..45cf83d606f --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -0,0 +1,70 @@ +#pragma once +#include +#include +#include + +namespace DB +{ +class StorageObjectStorageSink : public SinkToStorage +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + StorageObjectStorageSink( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + const std::optional & format_settings_, + const Block & sample_block_, + ContextPtr context, + const std::string & blob_path = ""); + + String getName() const override { return "StorageObjectStorageSink"; } + + void consume(Chunk chunk) override; + + void onCancel() override; + + void onException(std::exception_ptr exception) override; + + void onFinish() override; + +private: + const Block sample_block; + std::unique_ptr write_buf; + OutputFormatPtr writer; + bool cancelled = false; + std::mutex cancel_mutex; + + void finalize(); + void release(); +}; + +class PartitionedStorageObjectStorageSink : public PartitionedSink +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + PartitionedStorageObjectStorageSink( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::optional format_settings_, + const Block & sample_block_, + ContextPtr context_, + const ASTPtr & partition_by); + + SinkPtr createSinkForPartition(const String & partition_id) override; + +private: + void validateKey(const String & str); + void validateNamespace(const String & str); + + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + + const StorageObjectStorage::QuerySettings query_settings; + const std::optional format_settings; + const Block sample_block; + const ContextPtr context; +}; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp new file mode 100644 index 00000000000..b31d0f8a92e --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -0,0 +1,783 @@ +#include "StorageObjectStorageSource.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace ProfileEvents +{ + extern const Event EngineFileLikeReadFiles; +} + +namespace CurrentMetrics +{ + extern const Metric StorageObjectStorageThreads; + extern const Metric StorageObjectStorageThreadsActive; + extern const Metric StorageObjectStorageThreadsScheduled; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_COMPILE_REGEXP; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int FILE_DOESNT_EXIST; +} + +StorageObjectStorageSource::StorageObjectStorageSource( + String name_, + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const ReadFromFormatInfo & info, + const std::optional & format_settings_, + ContextPtr context_, + UInt64 max_block_size_, + std::shared_ptr file_iterator_, + size_t max_parsing_threads_, + bool need_only_count_) + : SourceWithKeyCondition(info.source_header, false) + , WithContext(context_) + , name(std::move(name_)) + , object_storage(object_storage_) + , configuration(configuration_) + , format_settings(format_settings_) + , max_block_size(max_block_size_) + , need_only_count(need_only_count_) + , max_parsing_threads(max_parsing_threads_) + , read_from_format_info(info) + , create_reader_pool(std::make_shared( + CurrentMetrics::StorageObjectStorageThreads, + CurrentMetrics::StorageObjectStorageThreadsActive, + CurrentMetrics::StorageObjectStorageThreadsScheduled, + 1/* max_threads */)) + , columns_desc(info.columns_description) + , file_iterator(file_iterator_) + , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) + , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) +{ +} + +StorageObjectStorageSource::~StorageObjectStorageSource() +{ + create_reader_pool->wait(); +} + +void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) +{ + setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); +} + +std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier( + const Configuration & configuration, + const ObjectInfo & object_info, + bool include_connection_info) +{ + auto path = object_info.getPath(); + if (path.starts_with("/")) + path = path.substr(1); + + if (include_connection_info) + return fs::path(configuration.getDataSourceDescription()) / path; + else + return fs::path(configuration.getNamespace()) / path; +} + +std::shared_ptr StorageObjectStorageSource::createFileIterator( + ConfigurationPtr configuration, + ObjectStoragePtr object_storage, + bool distributed_processing, + const ContextPtr & local_context, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns, + ObjectInfos * read_keys, + std::function file_progress_callback) +{ + if (distributed_processing) + return std::make_shared( + local_context->getReadTaskCallback(), + local_context->getSettingsRef().max_threads); + + if (configuration->isNamespaceWithGlobs()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expression can not have wildcards inside {} name", configuration->getNamespaceType()); + + auto settings = configuration->getQuerySettings(local_context); + const bool is_archive = configuration->isArchive(); + + std::unique_ptr iterator; + if (configuration->isPathWithGlobs()) + { + /// Iterate through disclosed globs and make a source for each file + iterator = std::make_unique( + object_storage, configuration, predicate, virtual_columns, + local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size, + settings.throw_on_zero_files_match, file_progress_callback); + } + else + { + ConfigurationPtr copy_configuration = configuration->clone(); + auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + if (filter_dag) + { + auto keys = configuration->getPaths(); + std::vector paths; + paths.reserve(keys.size()); + for (const auto & key : keys) + paths.push_back(fs::path(configuration->getNamespace()) / key); + VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); + copy_configuration->setPaths(keys); + } + + iterator = std::make_unique( + object_storage, copy_configuration, virtual_columns, is_archive ? nullptr : read_keys, + settings.ignore_non_existent_file, file_progress_callback); + } + + if (is_archive) + { + return std::make_shared(object_storage, configuration, std::move(iterator), local_context, read_keys); + } + + return iterator; +} + +void StorageObjectStorageSource::lazyInitialize(size_t processor) +{ + if (initialized) + return; + + reader = createReader(processor); + if (reader) + reader_future = createReaderAsync(processor); + initialized = true; +} + +Chunk StorageObjectStorageSource::generate() +{ + lazyInitialize(0); + + while (true) + { + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); + break; + } + + Chunk chunk; + if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + total_rows_in_file += num_rows; + + size_t chunk_size = 0; + if (const auto * input_format = reader.getInputFormat()) + chunk_size = input_format->getApproxBytesReadForChunk(); + + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + + const auto & object_info = reader.getObjectInfo(); + const auto & filename = object_info.getFileName(); + chassert(object_info.metadata); + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + chunk, + read_from_format_info.requested_virtual_columns, + getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), + object_info.metadata->size_bytes, &filename); + + return chunk; + } + + if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(reader.getObjectInfo(), total_rows_in_file); + + total_rows_in_file = 0; + + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) + break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + create_reader_pool->wait(); + reader_future = createReaderAsync(); + } + + return {}; +} + +void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows) +{ + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, object_info), + configuration->format, + format_settings, + getContext()); + schema_cache.addNumRows(cache_key, num_rows); +} + +std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) +{ + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, object_info), + configuration->format, + format_settings, + getContext()); + + auto get_last_mod_time = [&]() -> std::optional + { + return object_info.metadata + ? std::optional(object_info.metadata->last_modified.epochTime()) + : std::nullopt; + }; + return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); +} + +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) +{ + ObjectInfoPtr object_info; + auto query_settings = configuration->getQuerySettings(getContext()); + + do + { + object_info = file_iterator->next(processor); + + if (!object_info || object_info->getFileName().empty()) + return {}; + + if (!object_info->metadata) + { + const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath(); + object_info->metadata = object_storage->getObjectMetadata(path); + } + } + while (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0); + + QueryPipelineBuilder builder; + std::shared_ptr source; + std::unique_ptr read_buf; + + std::optional num_rows_from_cache = need_only_count + && getContext()->getSettingsRef().use_cache_for_count_from_files + ? tryGetNumRowsFromCache(*object_info) + : std::nullopt; + + if (num_rows_from_cache) + { + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + builder.init(Pipe(std::make_shared( + read_from_format_info.format_header, *num_rows_from_cache, max_block_size))); + } + else + { + CompressionMethod compression_method; + if (const auto * object_info_in_archive = dynamic_cast(object_info.get())) + { + compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method); + const auto & archive_reader = object_info_in_archive->archive_reader; + read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); + } + else + { + compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method); + read_buf = createReadBuffer(*object_info); + } + + auto input_format = FormatFactory::instance().getInput( + configuration->format, + *read_buf, + read_from_format_info.format_header, + getContext(), + max_block_size, + format_settings, + need_only_count ? 1 : max_parsing_threads, + std::nullopt, + true/* is_remote_fs */, + compression_method, + need_only_count); + + if (key_condition) + input_format->setKeyCondition(key_condition); + + if (need_only_count) + input_format->needOnlyCount(); + + builder.init(Pipe(input_format)); + + if (columns_desc.hasDefaults()) + { + builder.addSimpleTransform( + [&](const Block & header) + { + return std::make_shared(header, columns_desc, *input_format, getContext()); + }); + } + + source = input_format; + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, read_from_format_info.requested_columns); + }); + + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + auto current_reader = std::make_unique(*pipeline); + + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); + + return ReaderHolder( + object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)); +} + +std::future StorageObjectStorageSource::createReaderAsync(size_t processor) +{ + return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); +} + +std::unique_ptr StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info) +{ + const auto & object_size = object_info.metadata->size_bytes; + + auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + read_settings.enable_filesystem_cache = false; + /// FIXME: Changing this setting to default value breaks something around parquet reading + read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; + + const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; + const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; + read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; + /// User's object may change, don't cache it. + read_settings.use_page_cache_for_disks_without_file_cache = false; + + // Create a read buffer that will prefetch the first ~1 MB of the file. + // When reading lots of tiny files, this prefetching almost doubles the throughput. + // For bigger files, parallel reading is more useful. + if (use_prefetch) + { + LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size); + + auto async_reader = object_storage->readObjects( + StoredObjects{StoredObject{object_info.getPath(), /* local_path */ "", object_size}}, read_settings); + + async_reader->setReadUntilEnd(); + if (read_settings.remote_fs_prefetch) + async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); + + return async_reader; + } + else + { + /// FIXME: this is inconsistent that readObject always reads synchronously ignoring read_method setting. + return object_storage->readObject(StoredObject(object_info.getPath(), "", object_size), read_settings); + } +} + +StorageObjectStorageSource::IIterator::IIterator(const std::string & logger_name_) + : logger(getLogger(logger_name_)) +{ +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor) +{ + auto object_info = nextImpl(processor); + + if (object_info) + { + LOG_TEST(logger, "Next key: {}", object_info->getFileName()); + } + + return object_info; +} + +StorageObjectStorageSource::GlobIterator::GlobIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns_, + ContextPtr context_, + ObjectInfos * read_keys_, + size_t list_object_keys_size, + bool throw_on_zero_files_match_, + std::function file_progress_callback_) + : IIterator("GlobIterator") + , WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) + , virtual_columns(virtual_columns_) + , throw_on_zero_files_match(throw_on_zero_files_match_) + , read_keys(read_keys_) + , file_progress_callback(file_progress_callback_) +{ + if (configuration->isNamespaceWithGlobs()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); + } + else if (configuration->isPathWithGlobs()) + { + const auto key_with_globs = configuration_->getPath(); + const auto key_prefix = configuration->getPathWithoutGlobs(); + object_storage_iterator = object_storage->iterate(key_prefix, list_object_keys_size); + + matcher = std::make_unique(makeRegexpPatternFromGlobs(key_with_globs)); + if (!matcher->ok()) + { + throw Exception( + ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", key_with_globs, matcher->error()); + } + + recursive = key_with_globs == "/**"; + filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Using glob iterator with path without globs is not allowed (used path: {})", + configuration->getPath()); + } +} + +size_t StorageObjectStorageSource::GlobIterator::estimatedKeysCount() +{ + if (object_infos.empty() && !is_finished && object_storage_iterator->isValid()) + { + /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily); + /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do + /// as it would lead to serious slow down of the execution, since objects are going + /// to be fetched sequentially rather than in-parallel with up to times. + return std::numeric_limits::max(); + } + return object_infos.size(); +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor) +{ + std::lock_guard lock(next_mutex); + auto object_info = nextImplUnlocked(processor); + if (first_iteration && !object_info && throw_on_zero_files_match) + { + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, + "Can not match any files with path {}", + configuration->getPath()); + } + first_iteration = false; + return object_info; +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImplUnlocked(size_t /* processor */) +{ + bool current_batch_processed = object_infos.empty() || index >= object_infos.size(); + if (is_finished && current_batch_processed) + return {}; + + if (current_batch_processed) + { + ObjectInfos new_batch; + while (new_batch.empty()) + { + auto result = object_storage_iterator->getCurrentBatchAndScheduleNext(); + if (!result.has_value()) + { + is_finished = true; + return {}; + } + + new_batch = std::move(result.value()); + for (auto it = new_batch.begin(); it != new_batch.end();) + { + if (!recursive && !re2::RE2::FullMatch((*it)->getPath(), *matcher)) + it = new_batch.erase(it); + else + ++it; + } + } + + index = 0; + + if (filter_dag) + { + std::vector paths; + paths.reserve(new_batch.size()); + for (const auto & object_info : new_batch) + { + chassert(object_info); + paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); + } + + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); + LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); + } + + if (read_keys) + read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end()); + + object_infos = std::move(new_batch); + + if (file_progress_callback) + { + for (const auto & object_info : object_infos) + { + chassert(object_info->metadata); + file_progress_callback(FileProgress(0, object_info->metadata->size_bytes)); + } + } + } + + if (index >= object_infos.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); + + return object_infos[index++]; +} + +StorageObjectStorageSource::KeysIterator::KeysIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const NamesAndTypesList & virtual_columns_, + ObjectInfos * read_keys_, + bool ignore_non_existent_files_, + std::function file_progress_callback_) + : IIterator("KeysIterator") + , object_storage(object_storage_) + , configuration(configuration_) + , virtual_columns(virtual_columns_) + , file_progress_callback(file_progress_callback_) + , keys(configuration->getPaths()) + , ignore_non_existent_files(ignore_non_existent_files_) +{ + if (read_keys_) + { + /// TODO: should we add metadata if we anyway fetch it if file_progress_callback is passed? + for (auto && key : keys) + { + auto object_info = std::make_shared(key); + read_keys_->emplace_back(object_info); + } + } +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */) +{ + while (true) + { + size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= keys.size()) + return {}; + + auto key = keys[current_index]; + + ObjectMetadata object_metadata{}; + if (ignore_non_existent_files) + { + auto metadata = object_storage->tryGetObjectMetadata(key); + if (!metadata) + continue; + } + else + object_metadata = object_storage->getObjectMetadata(key); + + if (file_progress_callback) + file_progress_callback(FileProgress(0, object_metadata.size_bytes)); + + return std::make_shared(key, object_metadata); + } +} + +StorageObjectStorageSource::ReaderHolder::ReaderHolder( + ObjectInfoPtr object_info_, + std::unique_ptr read_buf_, + std::shared_ptr source_, + std::unique_ptr pipeline_, + std::unique_ptr reader_) + : object_info(std::move(object_info_)) + , read_buf(std::move(read_buf_)) + , source(std::move(source_)) + , pipeline(std::move(pipeline_)) + , reader(std::move(reader_)) +{ +} + +StorageObjectStorageSource::ReaderHolder & +StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept +{ + /// The order of destruction is important. + /// reader uses pipeline, pipeline uses read_buf. + reader = std::move(other.reader); + pipeline = std::move(other.pipeline); + source = std::move(other.source); + read_buf = std::move(other.read_buf); + object_info = std::move(other.object_info); + return *this; +} + +StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( + const ReadTaskCallback & callback_, size_t max_threads_count) + : IIterator("ReadTaskIterator") + , callback(callback_) +{ + ThreadPool pool( + CurrentMetrics::StorageObjectStorageThreads, + CurrentMetrics::StorageObjectStorageThreadsActive, + CurrentMetrics::StorageObjectStorageThreadsScheduled, max_threads_count); + + auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "ReadTaskIter"); + + std::vector> keys; + keys.reserve(max_threads_count); + for (size_t i = 0; i < max_threads_count; ++i) + keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); + + pool.wait(); + buffer.reserve(max_threads_count); + for (auto & key_future : keys) + { + auto key = key_future.get(); + if (!key.empty()) + buffer.emplace_back(std::make_shared(key, std::nullopt)); + } +} + +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::nextImpl(size_t) +{ + size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= buffer.size()) + return std::make_shared(callback()); + + return buffer[current_index]; +} + +static IArchiveReader::NameFilter createArchivePathFilter(const std::string & archive_pattern) +{ + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(archive_pattern)); + if (!matcher->ok()) + { + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", + archive_pattern, matcher->error()); + } + return [matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }; +} + +StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( + ObjectInfoPtr archive_object_, + const std::string & path_in_archive_, + std::shared_ptr archive_reader_) + : archive_object(archive_object_) + , path_in_archive(path_in_archive_) + , archive_reader(archive_reader_) +{ +} + +StorageObjectStorageSource::ArchiveIterator::ArchiveIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::unique_ptr archives_iterator_, + ContextPtr context_, + ObjectInfos * read_keys_) + : IIterator("ArchiveIterator") + , WithContext(context_) + , object_storage(object_storage_) + , is_path_in_archive_with_globs(configuration_->isPathInArchiveWithGlobs()) + , archives_iterator(std::move(archives_iterator_)) + , filter(is_path_in_archive_with_globs ? createArchivePathFilter(configuration_->getPathInArchive()) : IArchiveReader::NameFilter{}) + , path_in_archive(is_path_in_archive_with_globs ? "" : configuration_->getPathInArchive()) + , read_keys(read_keys_) +{ +} + +std::shared_ptr +StorageObjectStorageSource::ArchiveIterator::createArchiveReader(ObjectInfoPtr object_info) const +{ + const auto size = object_info->metadata->size_bytes; + return DB::createArchiveReader( + /* path_to_archive */object_info->getPath(), + /* archive_read_function */[=, this]() + { + StoredObject stored_object(object_info->getPath(), "", size); + return object_storage->readObject(stored_object, getContext()->getReadSettings()); + }, + /* archive_size */size); +} + +StorageObjectStorageSource::ObjectInfoPtr +StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) +{ + std::unique_lock lock{next_mutex}; + while (true) + { + if (filter) + { + if (!file_enumerator) + { + archive_object = archives_iterator->next(processor); + if (!archive_object) + return {}; + + archive_reader = createArchiveReader(archive_object); + file_enumerator = archive_reader->firstFile(); + if (!file_enumerator) + continue; + } + else if (!file_enumerator->nextFile()) + { + file_enumerator.reset(); + continue; + } + + path_in_archive = file_enumerator->getFileName(); + if (!filter(path_in_archive)) + continue; + } + else + { + archive_object = archives_iterator->next(processor); + if (!archive_object) + return {}; + + if (!archive_object->metadata) + archive_object->metadata = object_storage->getObjectMetadata(archive_object->getPath()); + + archive_reader = createArchiveReader(archive_object); + if (!archive_reader->fileExists(path_in_archive)) + continue; + } + + auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; + } +} + +size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() +{ + return archives_iterator->estimatedKeysCount(); +} + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h new file mode 100644 index 00000000000..fd7c7aa7102 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -0,0 +1,310 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class SchemaCache; + +class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext +{ + friend class StorageS3QueueSource; +public: + using Configuration = StorageObjectStorage::Configuration; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + using ObjectInfo = StorageObjectStorage::ObjectInfo; + using ObjectInfos = StorageObjectStorage::ObjectInfos; + using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr; + + class IIterator; + class ReadTaskIterator; + class GlobIterator; + class KeysIterator; + class ArchiveIterator; + + StorageObjectStorageSource( + String name_, + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration, + const ReadFromFormatInfo & info, + const std::optional & format_settings_, + ContextPtr context_, + UInt64 max_block_size_, + std::shared_ptr file_iterator_, + size_t max_parsing_threads_, + bool need_only_count_); + + ~StorageObjectStorageSource() override; + + String getName() const override { return name; } + + void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override; + + Chunk generate() override; + + static std::shared_ptr createFileIterator( + ConfigurationPtr configuration, + ObjectStoragePtr object_storage, + bool distributed_processing, + const ContextPtr & local_context, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns, + ObjectInfos * read_keys, + std::function file_progress_callback = {}); + + static std::string getUniqueStoragePathIdentifier( + const Configuration & configuration, + const ObjectInfo & object_info, + bool include_connection_info = true); + +protected: + const String name; + ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const std::optional format_settings; + const UInt64 max_block_size; + const bool need_only_count; + const size_t max_parsing_threads; + const ReadFromFormatInfo read_from_format_info; + const std::shared_ptr create_reader_pool; + + ColumnsDescription columns_desc; + std::shared_ptr file_iterator; + SchemaCache & schema_cache; + bool initialized = false; + size_t total_rows_in_file = 0; + LoggerPtr log = getLogger("StorageObjectStorageSource"); + + struct ReaderHolder : private boost::noncopyable + { + public: + ReaderHolder( + ObjectInfoPtr object_info_, + std::unique_ptr read_buf_, + std::shared_ptr source_, + std::unique_ptr pipeline_, + std::unique_ptr reader_); + + ReaderHolder() = default; + ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); } + ReaderHolder & operator=(ReaderHolder && other) noexcept; + + explicit operator bool() const { return reader != nullptr; } + PullingPipelineExecutor * operator->() { return reader.get(); } + const PullingPipelineExecutor * operator->() const { return reader.get(); } + + const ObjectInfo & getObjectInfo() const { return *object_info; } + const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } + + private: + ObjectInfoPtr object_info; + std::unique_ptr read_buf; + std::shared_ptr source; + std::unique_ptr pipeline; + std::unique_ptr reader; + }; + + ReaderHolder reader; + ThreadPoolCallbackRunnerUnsafe create_reader_scheduler; + std::future reader_future; + + /// Recreate ReadBuffer and Pipeline for each file. + ReaderHolder createReader(size_t processor = 0); + std::future createReaderAsync(size_t processor = 0); + std::unique_ptr createReadBuffer(const ObjectInfo & object_info); + + void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows); + std::optional tryGetNumRowsFromCache(const ObjectInfo & object_info); + void lazyInitialize(size_t processor); +}; + +class StorageObjectStorageSource::IIterator +{ +public: + explicit IIterator(const std::string & logger_name_); + + virtual ~IIterator() = default; + + virtual size_t estimatedKeysCount() = 0; + + ObjectInfoPtr next(size_t processor); + +protected: + virtual ObjectInfoPtr nextImpl(size_t processor) = 0; + LoggerPtr logger; +}; + +class StorageObjectStorageSource::ReadTaskIterator : public IIterator +{ +public: + ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count); + + size_t estimatedKeysCount() override { return buffer.size(); } + +private: + ObjectInfoPtr nextImpl(size_t) override; + + ReadTaskCallback callback; + ObjectInfos buffer; + std::atomic_size_t index = 0; +}; + +class StorageObjectStorageSource::GlobIterator : public IIterator, WithContext +{ +public: + GlobIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns_, + ContextPtr context_, + ObjectInfos * read_keys_, + size_t list_object_keys_size, + bool throw_on_zero_files_match_, + std::function file_progress_callback_ = {}); + + ~GlobIterator() override = default; + + size_t estimatedKeysCount() override; + +private: + ObjectInfoPtr nextImpl(size_t processor) override; + ObjectInfoPtr nextImplUnlocked(size_t processor); + void createFilterAST(const String & any_key); + void fillBufferForKey(const std::string & uri_key); + + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const NamesAndTypesList virtual_columns; + const bool throw_on_zero_files_match; + + size_t index = 0; + + ObjectInfos object_infos; + ObjectInfos * read_keys; + ActionsDAGPtr filter_dag; + ObjectStorageIteratorPtr object_storage_iterator; + bool recursive{false}; + std::vector expanded_keys; + std::vector::iterator expanded_keys_iter; + + std::unique_ptr matcher; + + bool is_finished = false; + bool first_iteration = true; + std::mutex next_mutex; + + std::function file_progress_callback; +}; + +class StorageObjectStorageSource::KeysIterator : public IIterator +{ +public: + KeysIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const NamesAndTypesList & virtual_columns_, + ObjectInfos * read_keys_, + bool ignore_non_existent_files_, + std::function file_progress_callback = {}); + + ~KeysIterator() override = default; + + size_t estimatedKeysCount() override { return keys.size(); } + +private: + ObjectInfoPtr nextImpl(size_t processor) override; + + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const NamesAndTypesList virtual_columns; + const std::function file_progress_callback; + const std::vector keys; + std::atomic index = 0; + bool ignore_non_existent_files; +}; + +/* + * An archives iterator. + * Allows to iterate files inside one or many archives. + * `archives_iterator` is an iterator which iterates over different archives. + * There are two ways to read files in archives: + * 1. When we want to read one concete file in each archive. + * In this case we go through all archives, check if this certain file + * exists within this archive and read it if it exists. + * 2. When we have a certain pattern of files we want to read in each archive. + * For this purpose we create a filter defined as IArchiveReader::NameFilter. + */ +class StorageObjectStorageSource::ArchiveIterator : public IIterator, private WithContext +{ +public: + explicit ArchiveIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::unique_ptr archives_iterator_, + ContextPtr context_, + ObjectInfos * read_keys_); + + size_t estimatedKeysCount() override; + + struct ObjectInfoInArchive : public ObjectInfo + { + ObjectInfoInArchive( + ObjectInfoPtr archive_object_, + const std::string & path_in_archive_, + std::shared_ptr archive_reader_); + + std::string getFileName() const override + { + return path_in_archive; + } + + std::string getPath() const override + { + return archive_object->getPath() + "::" + path_in_archive; + } + + std::string getPathToArchive() const override + { + return archive_object->getPath(); + } + + bool isArchive() const override { return true; } + + const ObjectInfoPtr archive_object; + const std::string path_in_archive; + const std::shared_ptr archive_reader; + }; + +private: + ObjectInfoPtr nextImpl(size_t processor) override; + std::shared_ptr createArchiveReader(ObjectInfoPtr object_info) const; + + const ObjectStoragePtr object_storage; + const bool is_path_in_archive_with_globs; + /// Iterator which iterates through different archives. + const std::unique_ptr archives_iterator; + /// Used when files inside archive are defined with a glob + const IArchiveReader::NameFilter filter = {}; + /// Current file inside the archive. + std::string path_in_archive = {}; + /// Read keys of files inside archives. + ObjectInfos * read_keys; + /// Object pointing to archive (NOT path within archive). + ObjectInfoPtr archive_object; + /// Reader of the archive. + std::shared_ptr archive_reader; + /// File enumerator inside the archive. + std::unique_ptr file_enumerator; + + std::mutex next_mutex; +}; + +} diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp new file mode 100644 index 00000000000..e49e14d2a0c --- /dev/null +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -0,0 +1,76 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::optional checkAndGetNewFileOnInsertIfNeeded( + const IObjectStorage & object_storage, + const StorageObjectStorage::Configuration & configuration, + const StorageObjectStorage::QuerySettings & settings, + const String & key, + size_t sequence_number) +{ + if (settings.truncate_on_insert + || !object_storage.exists(StoredObject(key))) + return std::nullopt; + + if (settings.create_new_file_on_insert) + { + auto pos = key.find_first_of('.'); + String new_key; + do + { + new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); + ++sequence_number; + } + while (object_storage.exists(StoredObject(new_key))); + + return new_key; + } + + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Object in bucket {} with key {} already exists. " + "If you want to overwrite it, enable setting {}_truncate_on_insert, if you " + "want to create a new file on each insert, enable setting {}_create_new_file_on_insert", + configuration.getNamespace(), key, configuration.getTypeName(), configuration.getTypeName()); +} + +void resolveSchemaAndFormat( + ColumnsDescription & columns, + std::string & format, + ObjectStoragePtr object_storage, + const StorageObjectStorage::ConfigurationPtr & configuration, + std::optional format_settings, + const ContextPtr & context) +{ + if (columns.empty()) + { + if (format == "auto") + std::tie(columns, format) = + StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context); + else + columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context); + } + else if (format == "auto") + { + format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, context); + } + + if (!columns.hasOnlyOrdinary()) + { + /// We don't allow special columns. + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Special columns are not supported for {} storage" + "like MATERIALIZED, ALIAS or EPHEMERAL", configuration->getTypeName()); + } +} + +} diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h new file mode 100644 index 00000000000..2077999df41 --- /dev/null +++ b/src/Storages/ObjectStorage/Utils.h @@ -0,0 +1,24 @@ +#pragma once +#include "StorageObjectStorage.h" + +namespace DB +{ + +class IObjectStorage; + +std::optional checkAndGetNewFileOnInsertIfNeeded( + const IObjectStorage & object_storage, + const StorageObjectStorage::Configuration & configuration, + const StorageObjectStorage::QuerySettings & settings, + const std::string & key, + size_t sequence_number); + +void resolveSchemaAndFormat( + ColumnsDescription & columns, + std::string & format, + ObjectStoragePtr object_storage, + const StorageObjectStorage::ConfigurationPtr & configuration, + std::optional format_settings, + const ContextPtr & context); + +} diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp new file mode 100644 index 00000000000..bf595b2f5d4 --- /dev/null +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +#if USE_AWS_S3 || USE_AZURE_BLOB_STORAGE || USE_HDFS + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +static std::shared_ptr createStorageObjectStorage( + const StorageFactory::Arguments & args, + StorageObjectStorage::ConfigurationPtr configuration, + ContextPtr context) +{ + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, context, false); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + if (args.storage_def->settings) + { + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = context->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + } + + // Apply changes from SETTINGS clause, with validation. + user_format_settings.applyChanges(args.storage_def->settings->changes); + format_settings = getFormatSettings(context, user_format_settings); + } + else + { + format_settings = getFormatSettings(context); + } + + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + + return std::make_shared( + configuration, + configuration->createObjectStorage(context, /* is_readonly */false), + args.getContext(), + args.table_id, + args.columns, + args.constraints, + args.comment, + format_settings, + /* distributed_processing */ false, + partition_by); +} + +#endif + +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzure(StorageFactory & factory) +{ + factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::AZURE, + }); +} +#endif + +#if USE_AWS_S3 +void registerStorageS3Impl(const String & name, StorageFactory & factory) +{ + factory.registerStorage(name, [=](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +void registerStorageS3(StorageFactory & factory) +{ + registerStorageS3Impl("S3", factory); +} + +void registerStorageCOS(StorageFactory & factory) +{ + registerStorageS3Impl("COSN", factory); +} + +void registerStorageOSS(StorageFactory & factory) +{ + registerStorageS3Impl("OSS", factory); +} + +#endif + +#if USE_HDFS +void registerStorageHDFS(StorageFactory & factory) +{ + factory.registerStorage("HDFS", [=](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::HDFS, + }); +} +#endif + +void registerStorageObjectStorage(StorageFactory & factory) +{ +#if USE_AWS_S3 + registerStorageS3(factory); + registerStorageCOS(factory); + registerStorageOSS(factory); +#endif +#if USE_AZURE_BLOB_STORAGE + registerStorageAzure(factory); +#endif +#if USE_HDFS + registerStorageHDFS(factory); +#endif + UNUSED(factory); +} + +} diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 69940990569..09b009b26d8 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -1,3 +1,5 @@ +// NOLINTBEGIN(clang-analyzer-optin.core.EnumCastOutOfRange) + #include "PartitionedSink.h" #include @@ -145,3 +147,5 @@ String PartitionedSink::replaceWildcards(const String & haystack, const String & } } + +// NOLINTEND(clang-analyzer-optin.core.EnumCastOutOfRange) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index 972c03e50d8..331c55cdacf 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -110,7 +110,7 @@ private: static void insertDefaultValue(StorageData & storage_data, size_t column_idx); void insertValue(StorageData & storage_data, const std::string & value, size_t column_idx); - enum class PostgreSQLQuery + enum class PostgreSQLQuery : uint8_t { INSERT, UPDATE, diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 30af80d6d85..75a97697e00 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -22,7 +22,7 @@ using StorageMetadataPtr = std::shared_ptr; /// Description of projections for Storage struct ProjectionDescription { - enum class Type + enum class Type : uint8_t { Normal, Aggregate, diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp index 28dc239ae37..d29194c888d 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp @@ -94,17 +94,31 @@ void RabbitMQConsumer::subscribe() bool RabbitMQConsumer::ackMessages(const CommitInfo & commit_info) { if (state != State::OK) + { + LOG_TEST(log, "State is {}, will not ack messages", magic_enum::enum_name(state.load(std::memory_order_relaxed))); return false; - - /// Nothing to ack. - if (!commit_info.delivery_tag) - return false; + } /// Do not send ack to server if message's channel is not the same as /// current running channel because delivery tags are scoped per channel, /// so if channel fails, all previous delivery tags become invalid. if (commit_info.channel_id != channel_id) + { + LOG_TEST(log, "Channel ID changed {} -> {}, will not ack messages", commit_info.channel_id, channel_id); return false; + } + + for (const auto & delivery_tag : commit_info.failed_delivery_tags) + { + if (consumer_channel->reject(delivery_tag)) + LOG_TRACE( + log, "Consumer rejected message with deliveryTag {} on channel {}", + delivery_tag, channel_id); + else + LOG_WARNING( + log, "Failed to reject message with deliveryTag {} on channel {}", + delivery_tag, channel_id); + } /// Duplicate ack? if (commit_info.delivery_tag > last_commited_delivery_tag @@ -119,11 +133,14 @@ bool RabbitMQConsumer::ackMessages(const CommitInfo & commit_info) return true; } - LOG_ERROR( - log, - "Did not commit messages for {}:{}, (current commit point {}:{})", - commit_info.channel_id, commit_info.delivery_tag, - channel_id, last_commited_delivery_tag); + if (commit_info.delivery_tag) + { + LOG_ERROR( + log, + "Did not commit messages for {}:{}, (current commit point {}:{})", + commit_info.channel_id, commit_info.delivery_tag, + channel_id, last_commited_delivery_tag); + } return false; } @@ -131,11 +148,18 @@ bool RabbitMQConsumer::ackMessages(const CommitInfo & commit_info) bool RabbitMQConsumer::nackMessages(const CommitInfo & commit_info) { if (state != State::OK) + { + LOG_TEST(log, "State is {}, will not nack messages", magic_enum::enum_name(state.load(std::memory_order_relaxed))); return false; + } /// Nothing to nack. if (!commit_info.delivery_tag || commit_info.delivery_tag <= last_commited_delivery_tag) + { + LOG_TEST(log, "Delivery tag is {}, last committed delivery tag: {}, Will not nack messages", + commit_info.delivery_tag, last_commited_delivery_tag); return false; + } if (consumer_channel->reject(commit_info.delivery_tag, AMQP::multiple)) { @@ -187,8 +211,14 @@ void RabbitMQConsumer::updateChannel(RabbitMQConnection & connection) consumer_channel->onError([&](const char * message) { - LOG_ERROR(log, "Channel {} in in error state: {}", channel_id, message); - state = State::ERROR; + LOG_ERROR( + log, "Channel {} received an error: {} (usable: {}, connected: {})", + channel_id, message, consumer_channel->usable(), consumer_channel->connected()); + + if (!consumer_channel->usable() || !consumer_channel->connected()) + { + state = State::ERROR; + } }); } diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h index 9dad175dda3..d319fb8830c 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h @@ -39,6 +39,7 @@ public: { UInt64 delivery_tag = 0; String channel_id; + std::vector failed_delivery_tags; }; struct MessageData @@ -97,7 +98,7 @@ private: String channel_id; UInt64 channel_id_counter = 0; - enum class State + enum class State : uint8_t { NONE, INITIALIZING, @@ -110,7 +111,7 @@ private: ConcurrentBoundedQueue received; MessageData current; - UInt64 last_commited_delivery_tag; + UInt64 last_commited_delivery_tag = 0; std::condition_variable cv; std::mutex mutex; diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 36e092925fd..f2c0dae21c6 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -32,6 +32,7 @@ namespace DB M(Bool, rabbitmq_queue_consume, false, "Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings", 0) \ M(String, rabbitmq_username, "", "RabbitMQ username", 0) \ M(String, rabbitmq_password, "", "RabbitMQ password", 0) \ + M(Bool, reject_unhandled_messages, false, "Allow messages to be rejected in case they cannot be processed. This also automatically implies if there is a x-deadletter-exchange queue setting added", 0) \ M(Bool, rabbitmq_commit_on_select, false, "Commit messages when select query is made", 0) \ M(UInt64, rabbitmq_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \ M(StreamingHandleErrorMode, rabbitmq_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for RabbitMQ engine. Possible values: default (throw an exception after rabbitmq_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 09c1bf1b2e7..15d013245d3 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -46,7 +46,9 @@ RabbitMQSource::RabbitMQSource( size_t max_block_size_, UInt64 max_execution_time_, StreamingHandleErrorMode handle_error_mode_, - bool ack_in_suffix_) + bool nack_broken_messages_, + bool ack_in_suffix_, + LoggerPtr log_) : RabbitMQSource( storage_, storage_snapshot_, @@ -56,7 +58,9 @@ RabbitMQSource::RabbitMQSource( max_block_size_, max_execution_time_, handle_error_mode_, - ack_in_suffix_) + nack_broken_messages_, + ack_in_suffix_, + log_) { } @@ -69,7 +73,9 @@ RabbitMQSource::RabbitMQSource( size_t max_block_size_, UInt64 max_execution_time_, StreamingHandleErrorMode handle_error_mode_, - bool ack_in_suffix_) + bool nack_broken_messages_, + bool ack_in_suffix_, + LoggerPtr log_) : ISource(getSampleBlock(headers.first, headers.second)) , storage(storage_) , storage_snapshot(storage_snapshot_) @@ -78,9 +84,10 @@ RabbitMQSource::RabbitMQSource( , max_block_size(max_block_size_) , handle_error_mode(handle_error_mode_) , ack_in_suffix(ack_in_suffix_) + , nack_broken_messages(nack_broken_messages_) , non_virtual_header(std::move(headers.first)) , virtual_header(std::move(headers.second)) - , log(getLogger("RabbitMQSource")) + , log(log_) , max_execution_time_ms(max_execution_time_) { storage.incrementReader(); @@ -119,7 +126,10 @@ Chunk RabbitMQSource::generate() { auto chunk = generateImpl(); if (!chunk && ack_in_suffix) + { + LOG_TEST(log, "Will send ack on select"); sendAck(); + } return chunk; } @@ -178,7 +188,7 @@ Chunk RabbitMQSource::generateImpl() StreamingFormatExecutor executor(non_virtual_header, input_format, on_error); - RabbitMQConsumer::CommitInfo current_commit_info; + /// Channel id will not change during read. while (true) { exception_message.reset(); @@ -186,8 +196,11 @@ Chunk RabbitMQSource::generateImpl() if (consumer->hasPendingMessages()) { + /// A buffer containing a single RabbitMQ message. if (auto buf = consumer->consume()) + { new_rows = executor.execute(*buf); + } } if (new_rows) @@ -195,6 +208,24 @@ Chunk RabbitMQSource::generateImpl() const auto exchange_name = storage.getExchange(); const auto & message = consumer->currentMessage(); + LOG_TEST(log, "Pulled {} rows, message delivery tag: {}, " + "previous delivery tag: {}, redelivered: {}, failed delivery tags by this moment: {}, exception message: {}", + new_rows, message.delivery_tag, commit_info.delivery_tag, message.redelivered, + commit_info.failed_delivery_tags.size(), + exception_message.has_value() ? exception_message.value() : "None"); + + commit_info.channel_id = message.channel_id; + + if (exception_message.has_value() && nack_broken_messages) + { + commit_info.failed_delivery_tags.push_back(message.delivery_tag); + } + else + { + chassert(!commit_info.delivery_tag || message.redelivered || commit_info.delivery_tag < message.delivery_tag); + commit_info.delivery_tag = std::max(commit_info.delivery_tag, message.delivery_tag); + } + for (size_t i = 0; i < new_rows; ++i) { virtual_columns[0]->insert(exchange_name); @@ -219,7 +250,6 @@ Chunk RabbitMQSource::generateImpl() } total_rows += new_rows; - current_commit_info = {message.delivery_tag, message.channel_id}; } else if (total_rows == 0) { @@ -261,7 +291,6 @@ Chunk RabbitMQSource::generateImpl() for (auto & column : virtual_columns) result_columns.push_back(std::move(column)); - commit_info = current_commit_info; return Chunk(std::move(result_columns), total_rows); } diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h index 0d6fad97054..54a9f52de6d 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.h +++ b/src/Storages/RabbitMQ/RabbitMQSource.h @@ -20,12 +20,15 @@ public: size_t max_block_size_, UInt64 max_execution_time_, StreamingHandleErrorMode handle_error_mode_, - bool ack_in_suffix = false); + bool nack_broken_messages_, + bool ack_in_suffix, + LoggerPtr log_); ~RabbitMQSource() override; String getName() const override { return storage.getName(); } void updateChannel(RabbitMQConnection & connection) { consumer->updateChannel(connection); } + String getChannelID() const { return consumer->getChannelID(); } Chunk generate() override; @@ -39,10 +42,11 @@ private: StorageRabbitMQ & storage; StorageSnapshotPtr storage_snapshot; ContextPtr context; - Names column_names; + const Names column_names; const size_t max_block_size; - StreamingHandleErrorMode handle_error_mode; - bool ack_in_suffix; + const StreamingHandleErrorMode handle_error_mode; + const bool ack_in_suffix; + const bool nack_broken_messages; bool is_finished = false; const Block non_virtual_header; @@ -65,7 +69,9 @@ private: size_t max_block_size_, UInt64 max_execution_time_, StreamingHandleErrorMode handle_error_mode_, - bool ack_in_suffix); + bool nack_broken_messages_, + bool ack_in_suffix, + LoggerPtr log_); Chunk generateImpl(); }; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index b882fd2728c..e4b19992151 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -64,6 +64,7 @@ namespace ExchangeType static const String HEADERS = "headers"; } +static const auto deadletter_exchange_setting = "x-dead-letter-exchange"; StorageRabbitMQ::StorageRabbitMQ( const StorageID & table_id_, @@ -84,15 +85,20 @@ StorageRabbitMQ::StorageRabbitMQ( , queue_base(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_base)) , queue_settings_list(parseSettings(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_settings_list))) , max_rows_per_message(rabbitmq_settings->rabbitmq_max_rows_per_message) + , log(getLogger("StorageRabbitMQ (" + table_id_.table_name + ")")) , persistent(rabbitmq_settings->rabbitmq_persistent.value) , use_user_setup(rabbitmq_settings->rabbitmq_queue_consume.value) , hash_exchange(num_consumers > 1 || num_queues > 1) - , log(getLogger("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) , unique_strbase(getRandomName()) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , milliseconds_to_wait(rabbitmq_settings->rabbitmq_empty_queue_backoff_start_ms) { + reject_unhandled_messages = rabbitmq_settings->reject_unhandled_messages + || queue_settings_list.end() != + std::find_if(queue_settings_list.begin(), queue_settings_list.end(), + [](const String & name) { return name.starts_with(deadletter_exchange_setting); }); + const auto & config = getContext()->getConfigRef(); std::pair parsed_address; @@ -402,9 +408,7 @@ void StorageRabbitMQ::initRabbitMQ() /// Main exchange -> Bridge exchange -> ( Sharding exchange ) -> Queues -> Consumers - initExchange(*rabbit_channel); bindExchange(*rabbit_channel); - for (const auto i : collections::range(0, num_queues)) bindQueue(i + 1, *rabbit_channel); @@ -436,7 +440,7 @@ void StorageRabbitMQ::initRabbitMQ() } -void StorageRabbitMQ::initExchange(AMQP::TcpChannel & rabbit_channel) +void StorageRabbitMQ::bindExchange(AMQP::TcpChannel & rabbit_channel) { /// Exchange hierarchy: /// 1. Main exchange (defined with table settings - rabbitmq_exchange_name, rabbitmq_exchange_type). @@ -449,68 +453,78 @@ void StorageRabbitMQ::initExchange(AMQP::TcpChannel & rabbit_channel) /// 1. `durable` (survive RabbitMQ server restart) /// 2. `autodelete` (auto delete in case of queue bindings are dropped). + std::string error; + int error_code; rabbit_channel.declareExchange(exchange_name, exchange_type, AMQP::durable) .onError([&](const char * message) { + connection->getHandler().stopLoop(); /// This error can be a result of attempt to declare exchange if it was already declared but /// 1) with different exchange type. /// 2) with different exchange settings. - throw Exception(ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, - "Unable to declare exchange. Make sure specified exchange is not already declared. Error: {}", - std::string(message)); + error = "Unable to declare exchange. " + "Make sure specified exchange is not already declared. Error: " + std::string(message); + error_code = ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE; }); rabbit_channel.declareExchange(bridge_exchange, AMQP::fanout, AMQP::durable | AMQP::autodelete) .onError([&](const char * message) { + connection->getHandler().stopLoop(); /// This error is not supposed to happen as this exchange name is always unique to type and its settings. - throw Exception( - ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, - "Unable to declare bridge exchange ({}). Reason: {}", bridge_exchange, std::string(message)); + if (error.empty()) + { + error = fmt::format("Unable to declare bridge exchange ({}). Reason: {}", + bridge_exchange, std::string(message)); + error_code = ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE; + } }); - if (!hash_exchange) + if (hash_exchange) + { + AMQP::Table binding_arguments; + + /// Default routing key property in case of hash exchange is a routing key, which is required to be an integer. + /// Support for arbitrary exchange type (i.e. arbitrary pattern of routing keys) requires to eliminate this dependency. + /// This settings changes hash property to message_id. + binding_arguments["hash-property"] = "message_id"; + + /// Declare hash exchange for sharding. + rabbit_channel.declareExchange(sharding_exchange, AMQP::consistent_hash, AMQP::durable | AMQP::autodelete, binding_arguments) + .onError([&](const char * message) + { + connection->getHandler().stopLoop(); + /// This error can be a result of same reasons as above for exchange_name, i.e. it will mean that sharding exchange name appeared + /// to be the same as some other exchange (which purpose is not for sharding). So probably actual error reason: queue_base parameter + /// is bad. + if (error.empty()) + { + error = fmt::format("Unable to declare sharding exchange ({}). Reason: {}", + sharding_exchange, std::string(message)); + error_code = ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE; + } + }); + + rabbit_channel.bindExchange(bridge_exchange, sharding_exchange, routing_keys[0]) + .onError([&](const char * message) + { + connection->getHandler().stopLoop(); + if (error.empty()) + { + error = fmt::format( + "Unable to bind bridge exchange ({}) to sharding exchange ({}). Reason: {}", + bridge_exchange, sharding_exchange, std::string(message)); + error_code = ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE; + } + }); + + consumer_exchange = sharding_exchange; + } + else { consumer_exchange = bridge_exchange; - return; } - AMQP::Table binding_arguments; - - /// Default routing key property in case of hash exchange is a routing key, which is required to be an integer. - /// Support for arbitrary exchange type (i.e. arbitrary pattern of routing keys) requires to eliminate this dependency. - /// This settings changes hash property to message_id. - binding_arguments["hash-property"] = "message_id"; - - /// Declare hash exchange for sharding. - rabbit_channel.declareExchange(sharding_exchange, AMQP::consistent_hash, AMQP::durable | AMQP::autodelete, binding_arguments) - .onError([&](const char * message) - { - /// This error can be a result of same reasons as above for exchange_name, i.e. it will mean that sharding exchange name appeared - /// to be the same as some other exchange (which purpose is not for sharding). So probably actual error reason: queue_base parameter - /// is bad. - throw Exception( - ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, - "Unable to declare sharding exchange ({}). Reason: {}", sharding_exchange, std::string(message)); - }); - - rabbit_channel.bindExchange(bridge_exchange, sharding_exchange, routing_keys[0]) - .onError([&](const char * message) - { - throw Exception( - ErrorCodes::CANNOT_BIND_RABBITMQ_EXCHANGE, - "Unable to bind bridge exchange ({}) to sharding exchange ({}). Reason: {}", - bridge_exchange, - sharding_exchange, - std::string(message)); - }); - - consumer_exchange = sharding_exchange; -} - - -void StorageRabbitMQ::bindExchange(AMQP::TcpChannel & rabbit_channel) -{ size_t bound_keys = 0; if (exchange_type == AMQP::ExchangeType::headers) @@ -527,10 +541,10 @@ void StorageRabbitMQ::bindExchange(AMQP::TcpChannel & rabbit_channel) .onSuccess([&]() { connection->getHandler().stopLoop(); }) .onError([&](const char * message) { - throw Exception( - ErrorCodes::CANNOT_BIND_RABBITMQ_EXCHANGE, - "Unable to bind exchange {} to bridge exchange ({}). Reason: {}", - exchange_name, bridge_exchange, std::string(message)); + connection->getHandler().stopLoop(); + error = fmt::format("Unable to bind exchange {} to bridge exchange ({}). Reason: {}", + exchange_name, bridge_exchange, std::string(message)); + error_code = ErrorCodes::CANNOT_BIND_RABBITMQ_EXCHANGE; }); } else if (exchange_type == AMQP::ExchangeType::fanout || exchange_type == AMQP::ExchangeType::consistent_hash) @@ -539,10 +553,13 @@ void StorageRabbitMQ::bindExchange(AMQP::TcpChannel & rabbit_channel) .onSuccess([&]() { connection->getHandler().stopLoop(); }) .onError([&](const char * message) { - throw Exception( - ErrorCodes::CANNOT_BIND_RABBITMQ_EXCHANGE, - "Unable to bind exchange {} to bridge exchange ({}). Reason: {}", - exchange_name, bridge_exchange, std::string(message)); + connection->getHandler().stopLoop(); + if (error.empty()) + { + error = fmt::format("Unable to bind exchange {} to bridge exchange ({}). Reason: {}", + exchange_name, bridge_exchange, std::string(message)); + error_code = ErrorCodes::CANNOT_BIND_RABBITMQ_EXCHANGE; + } }); } else @@ -558,20 +575,26 @@ void StorageRabbitMQ::bindExchange(AMQP::TcpChannel & rabbit_channel) }) .onError([&](const char * message) { - throw Exception( - ErrorCodes::CANNOT_BIND_RABBITMQ_EXCHANGE, - "Unable to bind exchange {} to bridge exchange ({}). Reason: {}", - exchange_name, bridge_exchange, std::string(message)); + connection->getHandler().stopLoop(); + if (error.empty()) + { + error = fmt::format("Unable to bind exchange {} to bridge exchange ({}). Reason: {}", + exchange_name, bridge_exchange, std::string(message)); + error_code = ErrorCodes::CANNOT_BIND_RABBITMQ_EXCHANGE; + } }); } } connection->getHandler().startBlockingLoop(); + if (!error.empty()) + throw Exception(error_code, "{}", error); } void StorageRabbitMQ::bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_channel) { + std::string error; auto success_callback = [&](const std::string & queue_name, int msgcount, int /* consumercount */) { queues.emplace_back(queue_name); @@ -588,23 +611,26 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_chann .onSuccess([&] { connection->getHandler().stopLoop(); }) .onError([&](const char * message) { - throw Exception( - ErrorCodes::CANNOT_CREATE_RABBITMQ_QUEUE_BINDING, - "Failed to create queue binding for exchange {}. Reason: {}", exchange_name, std::string(message)); + connection->getHandler().stopLoop(); + error = fmt::format("Failed to create queue binding for exchange {}. Reason: {}", + exchange_name, std::string(message)); }); }; auto error_callback([&](const char * message) { + connection->getHandler().stopLoop(); /* This error is most likely a result of an attempt to declare queue with different settings if it was declared before. So for a * given queue name either deadletter_exchange parameter changed or queue_size changed, i.e. table was declared with different * max_block_size parameter. Solution: client should specify a different queue_base parameter or manually delete previously * declared queues via any of the various cli tools. */ - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to declare queue. Probably queue settings are conflicting: " - "max_block_size, deadletter_exchange. Attempt specifying differently those settings " - "or use a different queue_base or manually delete previously declared queues, " - "which were declared with the same names. ERROR reason: {}", std::string(message)); + if (error.empty()) + error = fmt::format( + "Failed to declare queue. Probably queue settings are conflicting: " + "max_block_size, deadletter_exchange. Attempt specifying differently those settings " + "or use a different queue_base or manually delete previously declared queues, " + "which were declared with the same names. ERROR reason: {}", std::string(message)); }); AMQP::Table queue_settings; @@ -642,6 +668,8 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_chann /// and deleting queues should not take place. rabbit_channel.declareQueue(queue_name, AMQP::durable, queue_settings).onSuccess(success_callback).onError(error_callback); connection->getHandler().startBlockingLoop(); + if (!error.empty()) + throw Exception(ErrorCodes::CANNOT_CREATE_RABBITMQ_QUEUE_BINDING, "{}", error); } @@ -665,6 +693,7 @@ void StorageRabbitMQ::unbindExchange() stopLoop(); looping_task->deactivate(); + std::string error; auto rabbit_channel = connection->createChannel(); rabbit_channel->removeExchange(bridge_exchange) @@ -674,11 +703,14 @@ void StorageRabbitMQ::unbindExchange() }) .onError([&](const char * message) { - throw Exception(ErrorCodes::CANNOT_REMOVE_RABBITMQ_EXCHANGE, "Unable to remove exchange. Reason: {}", std::string(message)); + connection->getHandler().stopLoop(); + error = fmt::format("Unable to remove exchange. Reason: {}", std::string(message)); }); connection->getHandler().startBlockingLoop(); rabbit_channel->close(); + if (!error.empty()) + throw Exception(ErrorCodes::CANNOT_REMOVE_RABBITMQ_EXCHANGE, "{}", error); } catch (...) { @@ -739,8 +771,9 @@ void StorageRabbitMQ::read( for (size_t i = 0; i < num_created_consumers; ++i) { auto rabbit_source = std::make_shared( - *this, storage_snapshot, modified_context, column_names, 1, - max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, rabbitmq_settings->rabbitmq_commit_on_select); + *this, storage_snapshot, modified_context, column_names, /* max_block_size */1, + max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, reject_unhandled_messages, + /* ack_in_suffix */rabbitmq_settings->rabbitmq_commit_on_select, log); auto converting_dag = ActionsDAG::makeConvertingActions( rabbit_source->getPort().getHeader().getColumnsWithTypeAndName(), @@ -975,7 +1008,7 @@ void StorageRabbitMQ::streamingToViewsFunc() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + LOG_ERROR(log, "Error while streaming to views: {}", getCurrentExceptionMessage(true)); } mv_attached.store(false); @@ -1076,7 +1109,8 @@ bool StorageRabbitMQ::tryStreamToViews() { auto source = std::make_shared( *this, storage_snapshot, rabbitmq_context, Names{}, block_size, - max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode); + max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, + reject_unhandled_messages, /* ack_in_suffix */false, log); sources.emplace_back(source); pipes.emplace_back(source); @@ -1129,7 +1163,10 @@ bool StorageRabbitMQ::tryStreamToViews() if (!connection->isConnected()) { if (shutdown_called) + { + LOG_DEBUG(log, "Shutdown called, quitting"); return false; + } if (connection->reconnect()) { @@ -1145,6 +1182,8 @@ bool StorageRabbitMQ::tryStreamToViews() } else { + LOG_TEST(log, "Will {} messages for {} channels", write_failed ? "nack" : "ack", sources.size()); + /// Commit for (auto & source : sources) { @@ -1152,36 +1191,41 @@ bool StorageRabbitMQ::tryStreamToViews() ++queue_empty; if (source->needChannelUpdate()) - source->updateChannel(*connection); - - /* false is returned by the sendAck function in only two cases: - * 1) if connection failed. In this case all channels will be closed and will be unable to send ack. Also ack is made based on - * delivery tags, which are unique to channels, so if channels fail, those delivery tags will become invalid and there is - * no way to send specific ack from a different channel. Actually once the server realises that it has messages in a queue - * waiting for confirm from a channel which suddenly closed, it will immediately make those messages accessible to other - * consumers. So in this case duplicates are inevitable. - * 2) size of the sent frame (libraries's internal request interface) exceeds max frame - internal library error. This is more - * common for message frames, but not likely to happen to ack frame I suppose. So I do not believe it is likely to happen. - * Also in this case if channel didn't get closed - it is ok if failed to send ack, because the next attempt to send ack on - * the same channel will also commit all previously not-committed messages. Anyway I do not think that for ack frame this - * will ever happen. - */ - if (write_failed ? source->sendNack() : source->sendAck()) { - /// Iterate loop to activate error callbacks if they happened - connection->getHandler().iterateLoop(); - if (!connection->isConnected()) - break; + LOG_TEST(log, "Channel {} is in error state, will update", source->getChannelID()); + source->updateChannel(*connection); } + else + { + /* false is returned by the sendAck function in only two cases: + * 1) if connection failed. In this case all channels will be closed and will be unable to send ack. Also ack is made based on + * delivery tags, which are unique to channels, so if channels fail, those delivery tags will become invalid and there is + * no way to send specific ack from a different channel. Actually once the server realises that it has messages in a queue + * waiting for confirm from a channel which suddenly closed, it will immediately make those messages accessible to other + * consumers. So in this case duplicates are inevitable. + * 2) size of the sent frame (libraries's internal request interface) exceeds max frame - internal library error. This is more + * common for message frames, but not likely to happen to ack frame I suppose. So I do not believe it is likely to happen. + * Also in this case if channel didn't get closed - it is ok if failed to send ack, because the next attempt to send ack on + * the same channel will also commit all previously not-committed messages. Anyway I do not think that for ack frame this + * will ever happen. + */ + if (write_failed ? source->sendNack() : source->sendAck()) + { + /// Iterate loop to activate error callbacks if they happened + connection->getHandler().iterateLoop(); + if (!connection->isConnected()) + break; + } - connection->getHandler().iterateLoop(); + connection->getHandler().iterateLoop(); + } } } if (write_failed) { LOG_TRACE(log, "Write failed, reschedule"); - return false; + return true; } if (!hasDependencies(getStorageID())) @@ -1200,10 +1244,11 @@ bool StorageRabbitMQ::tryStreamToViews() } else { + LOG_TEST(log, "Will start background loop to let messages be pushed to channel"); startLoop(); } - /// Do not reschedule, do not stop event loop. + /// Reschedule. return true; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index e14741d9636..b8fab5825e4 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -91,6 +91,9 @@ private: String queue_base; Names queue_settings_list; size_t max_rows_per_message; + bool reject_unhandled_messages = false; + + LoggerPtr log; /// For insert query. Mark messages as durable. const bool persistent; @@ -101,7 +104,6 @@ private: bool use_user_setup; bool hash_exchange; - LoggerPtr log; RabbitMQConnectionPtr connection; /// Connection for all consumers RabbitMQConfiguration configuration; @@ -181,7 +183,6 @@ private: void initRabbitMQ(); void cleanupRabbitMQ() const; - void initExchange(AMQP::TcpChannel & rabbit_channel); void bindExchange(AMQP::TcpChannel & rabbit_channel); void bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_channel); diff --git a/src/Storages/RedisCommon.h b/src/Storages/RedisCommon.h index 4cc358e6536..a94e1245d73 100644 --- a/src/Storages/RedisCommon.h +++ b/src/Storages/RedisCommon.h @@ -16,7 +16,7 @@ namespace DB static constexpr size_t REDIS_MAX_BLOCK_SIZE = DEFAULT_BLOCK_SIZE; static constexpr size_t REDIS_LOCK_ACQUIRE_TIMEOUT_MS = 5000; -enum class RedisStorageType +enum class RedisStorageType : uint8_t { SIMPLE, HASH_MAP, diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp new file mode 100644 index 00000000000..0baa234e7a3 --- /dev/null +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -0,0 +1,243 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ROCKSDB_ERROR; +} + +static const IColumn::Permutation & getAscendingPermutation(const IColumn & column, IColumn::Permutation & perm) +{ + column.getPermutation(IColumn::PermutationSortDirection::Ascending, IColumn::PermutationSortStability::Stable, 0, 1, perm); + return perm; +} + +/// Build SST file from key-value pairs +static rocksdb::Status buildSSTFile(const String & path, const ColumnString & keys, const ColumnString & values, const std::optional & perm_ = {}) +{ + /// rocksdb::SstFileWriter requires keys to be sorted in ascending order + IColumn::Permutation calculated_perm; + const IColumn::Permutation & perm = perm_ ? *perm_ : getAscendingPermutation(keys, calculated_perm); + + rocksdb::SstFileWriter sst_file_writer(rocksdb::EnvOptions{}, rocksdb::Options{}); + auto status = sst_file_writer.Open(path); + if (!status.ok()) + return status; + + auto rows = perm.size(); + for (size_t idx = 0; idx < rows;) + { + /// We will write the last row of the same key + size_t next_idx = idx + 1; + while (next_idx < rows && keys.compareAt(perm[idx], perm[next_idx], keys, 1) == 0) + ++next_idx; + + auto row = perm[next_idx - 1]; + status = sst_file_writer.Put(keys.getDataAt(row).toView(), values.getDataAt(row).toView()); + if (!status.ok()) + return status; + + idx = next_idx; + } + + return sst_file_writer.Finish(); +} + +EmbeddedRocksDBBulkSink::EmbeddedRocksDBBulkSink( + ContextPtr context_, StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_) + : SinkToStorage(metadata_snapshot_->getSampleBlock()), WithContext(context_), storage(storage_), metadata_snapshot(metadata_snapshot_) +{ + for (const auto & elem : getHeader()) + { + if (elem.name == storage.primary_key) + break; + ++primary_key_pos; + } + + serializations = getHeader().getSerializations(); + min_block_size_rows = std::max(storage.getSettings().bulk_insert_block_size, getContext()->getSettingsRef().min_insert_block_size_rows); + + /// If max_insert_threads > 1 we may have multiple EmbeddedRocksDBBulkSink and getContext()->getCurrentQueryId() is not guarantee to + /// to have a distinct path. Also we cannot use query id as directory name here, because it could be defined by user and not suitable + /// for directory name + auto base_directory_name = TMP_INSERT_PREFIX + sipHash128String(getContext()->getCurrentQueryId()); + insert_directory_queue = fs::path(storage.getDataPaths()[0]) / (base_directory_name + "-" + getRandomASCIIString(8)); + fs::create_directory(insert_directory_queue); +} + +EmbeddedRocksDBBulkSink::~EmbeddedRocksDBBulkSink() +{ + try + { + if (fs::exists(insert_directory_queue)) + (void)fs::remove_all(insert_directory_queue); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Error while removing temporary directory {}:", insert_directory_queue)); + } +} + +std::vector EmbeddedRocksDBBulkSink::squash(Chunk chunk) +{ + /// End of input stream + if (chunk.getNumRows() == 0) + { + return std::move(chunks); + } + + /// Just read block is already enough. + if (isEnoughSize(chunk)) + { + /// If no accumulated data, return just read block. + if (chunks.empty()) + { + chunks.emplace_back(std::move(chunk)); + return {}; + } + + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + std::vector to_return; + std::swap(to_return, chunks); + chunks.emplace_back(std::move(chunk)); + return to_return; + } + + /// Accumulated block is already enough. + if (isEnoughSize(chunks)) + { + /// Return accumulated data and place new block to accumulated data. + std::vector to_return; + std::swap(to_return, chunks); + chunks.emplace_back(std::move(chunk)); + return to_return; + } + + chunks.emplace_back(std::move(chunk)); + if (isEnoughSize(chunks)) + { + std::vector to_return; + std::swap(to_return, chunks); + return to_return; + } + + /// Squashed block is not ready. + return {}; +} + +std::pair EmbeddedRocksDBBulkSink::serializeChunks(std::vector && input_chunks) const +{ + auto serialized_key_column = ColumnString::create(); + auto serialized_value_column = ColumnString::create(); + + { + auto & serialized_key_data = serialized_key_column->getChars(); + auto & serialized_key_offsets = serialized_key_column->getOffsets(); + auto & serialized_value_data = serialized_value_column->getChars(); + auto & serialized_value_offsets = serialized_value_column->getOffsets(); + WriteBufferFromVector writer_key(serialized_key_data); + WriteBufferFromVector writer_value(serialized_value_data); + + for (auto && chunk : input_chunks) + { + const auto & columns = chunk.getColumns(); + auto rows = chunk.getNumRows(); + for (size_t i = 0; i < rows; ++i) + { + for (size_t idx = 0; idx < columns.size(); ++idx) + serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, {}); + /// String in ColumnString must be null-terminated + writeChar('\0', writer_key); + writeChar('\0', writer_value); + serialized_key_offsets.emplace_back(writer_key.count()); + serialized_value_offsets.emplace_back(writer_value.count()); + } + } + + writer_key.finalize(); + writer_value.finalize(); + } + + return {std::move(serialized_key_column), std::move(serialized_value_column)}; +} + +void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) +{ + std::vector chunks_to_write = squash(std::move(chunk_)); + + if (chunks_to_write.empty()) + return; + + auto [serialized_key_column, serialized_value_column] = serializeChunks(std::move(chunks_to_write)); + auto sst_file_path = getTemporarySSTFilePath(); + LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows to SST file {}", serialized_key_column->size(), sst_file_path); + if (auto status = buildSSTFile(sst_file_path, *serialized_key_column, *serialized_value_column); !status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); + + /// Ingest the SST file + rocksdb::IngestExternalFileOptions ingest_options; + ingest_options.move_files = true; /// The temporary file is on the same disk, so move (or hardlink) file will be faster than copy + if (auto status = storage.rocksdb_ptr->IngestExternalFile({sst_file_path}, ingest_options); !status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); + + LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "SST file {} has been ingested", sst_file_path); + if (fs::exists(sst_file_path)) + (void)fs::remove(sst_file_path); +} + +void EmbeddedRocksDBBulkSink::onFinish() +{ + /// If there is any data left, write it. + if (!chunks.empty()) + consume({}); +} + +String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() +{ + return fs::path(insert_directory_queue) / (toString(file_counter++) + ".sst"); +} + +bool EmbeddedRocksDBBulkSink::isEnoughSize(const std::vector & input_chunks) const +{ + size_t total_rows = 0; + for (const auto & chunk : input_chunks) + total_rows += chunk.getNumRows(); + return total_rows >= min_block_size_rows; +} + +bool EmbeddedRocksDBBulkSink::isEnoughSize(const Chunk & chunk) const +{ + return chunk.getNumRows() >= min_block_size_rows; +} + +} diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h new file mode 100644 index 00000000000..46193b152ca --- /dev/null +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace fs = std::filesystem; + +class StorageEmbeddedRocksDB; +class EmbeddedRocksDBBulkSink; +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr; + +/// Optimized for bulk importing into StorageEmbeddedRocksDB: +/// 1. No mem-table: an SST file is built from chunk, then import to rocksdb +/// 2. Squash chunks to reduce the number of SST files +class EmbeddedRocksDBBulkSink : public SinkToStorage, public WithContext +{ +public: + EmbeddedRocksDBBulkSink( + ContextPtr context_, + StorageEmbeddedRocksDB & storage_, + const StorageMetadataPtr & metadata_snapshot_); + + ~EmbeddedRocksDBBulkSink() override; + + void consume(Chunk chunk) override; + + void onFinish() override; + + String getName() const override { return "EmbeddedRocksDBBulkSink"; } + +private: + /// Get a unique path to write temporary SST file + String getTemporarySSTFilePath(); + + /// Squash chunks to a minimum size + std::vector squash(Chunk chunk); + bool isEnoughSize(const std::vector & input_chunks) const; + bool isEnoughSize(const Chunk & chunk) const; + /// Serialize chunks to rocksdb key-value pairs + std::pair serializeChunks(std::vector && input_chunks) const; + + StorageEmbeddedRocksDB & storage; + StorageMetadataPtr metadata_snapshot; + size_t primary_key_pos = 0; + Serializations serializations; + + /// For squashing chunks + std::vector chunks; + size_t min_block_size_rows = 0; + + /// For writing SST files + size_t file_counter = 0; + static constexpr auto TMP_INSERT_PREFIX = "tmp_insert_"; + String insert_directory_queue; +}; + +} diff --git a/src/Storages/RocksDB/RocksDBSettings.cpp b/src/Storages/RocksDB/RocksDBSettings.cpp new file mode 100644 index 00000000000..7de2077eb47 --- /dev/null +++ b/src/Storages/RocksDB/RocksDBSettings.cpp @@ -0,0 +1,41 @@ +#include "RocksDBSettings.h" +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(RockDBSettingsTraits, LIST_OF_ROCKSDB_SETTINGS) + + +void RocksDBSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr /*context*/) +{ + if (storage_def.settings) + { + try + { + auto changes = storage_def.settings->changes; + applyChanges(changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } +} + +std::vector RocksDBSettings::getAllRegisteredNames() const +{ + std::vector all_settings; + for (const auto & setting_field : all()) + all_settings.push_back(setting_field.getName()); + return all_settings; +} +} diff --git a/src/Storages/RocksDB/RocksDBSettings.h b/src/Storages/RocksDB/RocksDBSettings.h new file mode 100644 index 00000000000..1b168c56d89 --- /dev/null +++ b/src/Storages/RocksDB/RocksDBSettings.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace Poco::Util +{ +class AbstractConfiguration; +} + + +namespace DB +{ +class ASTStorage; +struct Settings; + + +/** StorageEmbeddedRocksdb table settings + */ + +#define ROCKSDB_SETTINGS(M, ALIAS) \ + M(Bool, optimize_for_bulk_insert, true, "Table is optimized for bulk insertions (insert pipeline will create SST files and import to rocksdb database instead of writing to memtables)", 0) \ + M(UInt64, bulk_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "Size of block for bulk insert, if it's smaller than query setting min_insert_block_size_rows then it will be overridden by min_insert_block_size_rows", 0) \ + +#define LIST_OF_ROCKSDB_SETTINGS(M, ALIAS) ROCKSDB_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(RockDBSettingsTraits, LIST_OF_ROCKSDB_SETTINGS) + +struct RocksDBSettings : public BaseSettings, public IHints<2> +{ + void loadFromQuery(ASTStorage & storage_def, ContextPtr context); + std::vector getAllRegisteredNames() const override; +}; + +} diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 7c4581025e5..c3b7ae64c7e 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -28,8 +27,15 @@ #include #include #include +#include +#include +#include +#include #include +#include +#include +#include #include #include #include @@ -39,8 +45,6 @@ #include -namespace fs = std::filesystem; - namespace DB { @@ -174,6 +178,7 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, const StorageInMemoryMetadata & metadata_, LoadingStrictnessLevel mode, ContextPtr context_, + std::unique_ptr settings_, const String & primary_key_, Int32 ttl_, String rocksdb_dir_, @@ -184,8 +189,10 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, , rocksdb_dir(std::move(rocksdb_dir_)) , ttl(ttl_) , read_only(read_only_) + , log(getLogger(fmt::format("StorageEmbeddedRocksDB ({})", getStorageID().getNameForLogs()))) { setInMemoryMetadata(metadata_); + setSettings(std::move(settings_)); if (rocksdb_dir.empty()) { rocksdb_dir = context_->getPath() + relative_data_path_; @@ -205,7 +212,7 @@ void StorageEmbeddedRocksDB::truncate(const ASTPtr &, const StorageMetadataPtr & rocksdb_ptr->Close(); rocksdb_ptr = nullptr; - fs::remove_all(rocksdb_dir); + (void)fs::remove_all(rocksdb_dir); fs::create_directories(rocksdb_dir); initDB(); } @@ -236,22 +243,20 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt if (commands.front().type == MutationCommand::Type::DELETE) { - MutationsInterpreter::Settings settings(true); - settings.return_all_columns = true; - settings.return_mutated_rows = true; + MutationsInterpreter::Settings mutation_settings(true); + mutation_settings.return_all_columns = true; + mutation_settings.return_mutated_rows = true; auto interpreter = std::make_unique( storage_ptr, metadata_snapshot, commands, context_, - settings); + mutation_settings); auto pipeline = QueryPipelineBuilder::getPipeline(interpreter->execute()); PullingPipelineExecutor executor(pipeline); - auto sink = std::make_shared(*this, metadata_snapshot); - auto header = interpreter->getUpdatedHeader(); auto primary_key_pos = header.getPositionByName(primary_key); @@ -287,16 +292,16 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt if (commands.front().column_to_update_expression.contains(primary_key)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key cannot be updated (cannot update column {})", primary_key); - MutationsInterpreter::Settings settings(true); - settings.return_all_columns = true; - settings.return_mutated_rows = true; + MutationsInterpreter::Settings mutation_settings(true); + mutation_settings.return_all_columns = true; + mutation_settings.return_mutated_rows = true; auto interpreter = std::make_unique( storage_ptr, metadata_snapshot, commands, context_, - settings); + mutation_settings); auto pipeline = QueryPipelineBuilder::getPipeline(interpreter->execute()); PullingPipelineExecutor executor(pipeline); @@ -312,6 +317,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt void StorageEmbeddedRocksDB::drop() { + std::lock_guard lock(rocksdb_ptr_mx); rocksdb_ptr->Close(); rocksdb_ptr = nullptr; } @@ -352,7 +358,6 @@ void StorageEmbeddedRocksDB::initDB() rocksdb::Options base; base.create_if_missing = true; - base.compression = rocksdb::CompressionType::kZSTD; base.statistics = rocksdb::CreateDBStatistics(); /// It is too verbose by default, and in fact we don't care about rocksdb logs at all. base.info_log_level = rocksdb::ERROR_LEVEL; @@ -460,18 +465,13 @@ void StorageEmbeddedRocksDB::initDB() { rocksdb::DB * db; if (read_only) - { status = rocksdb::DB::OpenForReadOnly(merged, rocksdb_dir, &db); - } else - { status = rocksdb::DB::Open(merged, rocksdb_dir, &db); - } + if (!status.ok()) - { - throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", - rocksdb_dir, status.ToString()); - } + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", rocksdb_dir, status.ToString()); + rocksdb_ptr = std::unique_ptr(db); } } @@ -575,15 +575,23 @@ void ReadFromEmbeddedRocksDB::initializePipeline(QueryPipelineBuilder & pipeline void ReadFromEmbeddedRocksDB::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const auto & sample_block = getOutputStream().header; auto primary_key_data_type = sample_block.getByName(storage.primary_key).type; std::tie(keys, all_scan) = getFilterKeys(storage.primary_key, primary_key_data_type, filter_actions_dag, context); } SinkToStoragePtr StorageEmbeddedRocksDB::write( - const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool /*async_insert*/) + const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context, bool /*async_insert*/) { + if (getSettings().optimize_for_bulk_insert) + { + LOG_DEBUG(log, "Using bulk insert"); + return std::make_shared(query_context, *this, metadata_snapshot); + } + + LOG_DEBUG(log, "Using regular insert"); return std::make_shared(*this, metadata_snapshot); } @@ -622,7 +630,21 @@ static StoragePtr create(const StorageFactory::Arguments & args) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageEmbeddedRocksDB must require one column in primary key"); } - return std::make_shared(args.table_id, args.relative_data_path, metadata, args.mode, args.getContext(), primary_key_names[0], ttl, std::move(rocksdb_dir), read_only); + auto settings = std::make_unique(); + settings->loadFromQuery(*args.storage_def, args.getContext()); + if (args.storage_def->settings) + metadata.settings_changes = args.storage_def->settings->ptr(); + else + { + /// A workaround because embedded rocksdb doesn't have default immutable settings + /// But InterpreterAlterQuery requires settings_changes to be set to run ALTER MODIFY + /// SETTING queries. So we just add a setting with its default value. + auto settings_changes = std::make_shared(); + settings_changes->is_standalone = false; + settings_changes->changes.insertSetting("optimize_for_bulk_insert", settings->optimize_for_bulk_insert.value); + metadata.settings_changes = settings_changes; + } + return std::make_shared(args.table_id, args.relative_data_path, metadata, args.mode, args.getContext(), std::move(settings), primary_key_names[0], ttl, std::move(rocksdb_dir), read_only); } std::shared_ptr StorageEmbeddedRocksDB::getRocksDBStatistics() const @@ -713,9 +735,9 @@ Chunk StorageEmbeddedRocksDB::getBySerializedKeys( return Chunk(std::move(columns), num_rows); } -std::optional StorageEmbeddedRocksDB::totalRows(const Settings & settings) const +std::optional StorageEmbeddedRocksDB::totalRows(const Settings & query_settings) const { - if (!settings.optimize_trivial_approximate_count_query) + if (!query_settings.optimize_trivial_approximate_count_query) return {}; std::shared_lock lock(rocksdb_ptr_mx); if (!rocksdb_ptr) @@ -737,9 +759,26 @@ std::optional StorageEmbeddedRocksDB::totalBytes(const Settings & /*sett return estimated_bytes; } +void StorageEmbeddedRocksDB::alter( + const AlterCommands & params, + ContextPtr query_context, + AlterLockHolder & holder) +{ + IStorage::alter(params, query_context, holder); + auto new_metadata = getInMemoryMetadataPtr(); + if (new_metadata->settings_changes) + { + const auto & settings_changes = new_metadata->settings_changes->as(); + auto new_settings = std::make_unique(); + new_settings->applyChanges(settings_changes.changes); + setSettings(std::move(new_settings)); + } +} + void registerStorageEmbeddedRocksDB(StorageFactory & factory) { StorageFactory::StorageFeatures features{ + .supports_settings = true, .supports_sort_order = true, .supports_ttl = true, .supports_parallel_insert = true, @@ -747,4 +786,12 @@ void registerStorageEmbeddedRocksDB(StorageFactory & factory) factory.registerStorage("EmbeddedRocksDB", create, features); } + +void StorageEmbeddedRocksDB::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /* context */) const +{ + for (const auto & command : commands) + if (!command.isCommentAlter() && !command.isSettingsAlter()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName()); +} + } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 230464a161f..61592398954 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -1,11 +1,14 @@ #pragma once #include +#include #include -#include #include #include +#include #include +#include +#include namespace rocksdb @@ -27,6 +30,7 @@ class Context; class StorageEmbeddedRocksDB final : public IStorage, public IKeyValueEntity, WithContext { friend class EmbeddedRocksDBSink; + friend class EmbeddedRocksDBBulkSink; friend class ReadFromEmbeddedRocksDB; public: StorageEmbeddedRocksDB(const StorageID & table_id_, @@ -34,6 +38,7 @@ public: const StorageInMemoryMetadata & metadata, LoadingStrictnessLevel mode, ContextPtr context_, + std::unique_ptr settings_, const String & primary_key_, Int32 ttl_ = 0, String rocksdb_dir_ = "", @@ -59,6 +64,7 @@ public: void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override; void mutate(const MutationCommands &, ContextPtr) override; void drop() override; + void alter(const AlterCommands & params, ContextPtr query_context, AlterLockHolder &) override; bool optimize( const ASTPtr & query, @@ -99,7 +105,16 @@ public: std::optional totalBytes(const Settings & settings) const override; + void checkAlterIsPossible(const AlterCommands & commands, ContextPtr /* context */) const override; + + const RocksDBSettings & getSettings() const { return *storage_settings.get(); } + + void setSettings(std::unique_ptr && settings_) { storage_settings.set(std::move(settings_)); } + private: + SinkToStoragePtr getSink(ContextPtr context, const StorageMetadataPtr & metadata_snapshot); + + MultiVersion storage_settings; const String primary_key; using RocksDBPtr = std::unique_ptr; RocksDBPtr rocksdb_ptr; @@ -109,5 +124,7 @@ private: bool read_only; void initDB(); + + LoggerPtr log; }; } diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index 4406a7c3fd4..5105b190fd9 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp deleted file mode 100644 index ed2f8d2ec1b..00000000000 --- a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp +++ /dev/null @@ -1,1117 +0,0 @@ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -namespace ProfileEvents -{ - extern const Event S3QueueSetFileProcessingMicroseconds; - extern const Event S3QueueSetFileProcessedMicroseconds; - extern const Event S3QueueSetFileFailedMicroseconds; - extern const Event S3QueueCleanupMaxSetSizeOrTTLMicroseconds; - extern const Event S3QueueLockLocalFileStatusesMicroseconds; - extern const Event CannotRemoveEphemeralNode; -}; - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; -} - -namespace -{ - UInt64 getCurrentTime() - { - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); - } - - size_t generateRescheduleInterval(size_t min, size_t max) - { - /// Use more or less random interval for unordered mode cleanup task. - /// So that distributed processing cleanup tasks would not schedule cleanup at the same time. - pcg64 rng(randomSeed()); - return min + rng() % (max - min + 1); - } -} - -std::unique_lock S3QueueFilesMetadata::LocalFileStatuses::lock() const -{ - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueLockLocalFileStatusesMicroseconds); - return std::unique_lock(mutex); -} - -S3QueueFilesMetadata::FileStatuses S3QueueFilesMetadata::LocalFileStatuses::getAll() const -{ - auto lk = lock(); - return file_statuses; -} - -S3QueueFilesMetadata::FileStatusPtr S3QueueFilesMetadata::LocalFileStatuses::get(const std::string & filename, bool create) -{ - auto lk = lock(); - auto it = file_statuses.find(filename); - if (it == file_statuses.end()) - { - if (create) - it = file_statuses.emplace(filename, std::make_shared()).first; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "File status for {} doesn't exist", filename); - } - return it->second; -} - -bool S3QueueFilesMetadata::LocalFileStatuses::remove(const std::string & filename, bool if_exists) -{ - auto lk = lock(); - auto it = file_statuses.find(filename); - if (it == file_statuses.end()) - { - if (if_exists) - return false; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "File status for {} doesn't exist", filename); - } - file_statuses.erase(it); - return true; -} - -std::string S3QueueFilesMetadata::NodeMetadata::toString() const -{ - Poco::JSON::Object json; - json.set("file_path", file_path); - json.set("last_processed_timestamp", getCurrentTime()); - json.set("last_exception", last_exception); - json.set("retries", retries); - json.set("processing_id", processing_id); - - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss.exceptions(std::ios::failbit); - Poco::JSON::Stringifier::stringify(json, oss); - return oss.str(); -} - -S3QueueFilesMetadata::NodeMetadata S3QueueFilesMetadata::NodeMetadata::fromString(const std::string & metadata_str) -{ - Poco::JSON::Parser parser; - auto json = parser.parse(metadata_str).extract(); - - NodeMetadata metadata; - metadata.file_path = json->getValue("file_path"); - metadata.last_processed_timestamp = json->getValue("last_processed_timestamp"); - metadata.last_exception = json->getValue("last_exception"); - metadata.retries = json->getValue("retries"); - metadata.processing_id = json->getValue("processing_id"); - return metadata; -} - -S3QueueFilesMetadata::S3QueueFilesMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_) - : mode(settings_.mode) - , max_set_size(settings_.s3queue_tracked_files_limit.value) - , max_set_age_sec(settings_.s3queue_tracked_file_ttl_sec.value) - , max_loading_retries(settings_.s3queue_loading_retries.value) - , min_cleanup_interval_ms(settings_.s3queue_cleanup_interval_min_ms.value) - , max_cleanup_interval_ms(settings_.s3queue_cleanup_interval_max_ms.value) - , shards_num(settings_.s3queue_total_shards_num) - , threads_per_shard(settings_.s3queue_processing_threads_num) - , zookeeper_processing_path(zookeeper_path_ / "processing") - , zookeeper_processed_path(zookeeper_path_ / "processed") - , zookeeper_failed_path(zookeeper_path_ / "failed") - , zookeeper_shards_path(zookeeper_path_ / "shards") - , zookeeper_cleanup_lock_path(zookeeper_path_ / "cleanup_lock") - , log(getLogger("S3QueueFilesMetadata")) -{ - if (mode == S3QueueMode::UNORDERED && (max_set_size || max_set_age_sec)) - { - task = Context::getGlobalContextInstance()->getSchedulePool().createTask("S3QueueCleanupFunc", [this] { cleanupThreadFunc(); }); - task->activate(); - task->scheduleAfter(generateRescheduleInterval(min_cleanup_interval_ms, max_cleanup_interval_ms)); - } -} - -S3QueueFilesMetadata::~S3QueueFilesMetadata() -{ - deactivateCleanupTask(); -} - -void S3QueueFilesMetadata::deactivateCleanupTask() -{ - shutdown = true; - if (task) - task->deactivate(); -} - -zkutil::ZooKeeperPtr S3QueueFilesMetadata::getZooKeeper() const -{ - return Context::getGlobalContextInstance()->getZooKeeper(); -} - -S3QueueFilesMetadata::FileStatusPtr S3QueueFilesMetadata::getFileStatus(const std::string & path) -{ - /// Return a locally cached file status. - return local_file_statuses.get(path, /* create */false); -} - -std::string S3QueueFilesMetadata::getNodeName(const std::string & path) -{ - /// Since with are dealing with paths in s3 which can have "/", - /// we cannot create a zookeeper node with the name equal to path. - /// Therefore we use a hash of the path as a node name. - - SipHash path_hash; - path_hash.update(path); - return toString(path_hash.get64()); -} - -S3QueueFilesMetadata::NodeMetadata S3QueueFilesMetadata::createNodeMetadata( - const std::string & path, - const std::string & exception, - size_t retries) -{ - /// Create a metadata which will be stored in a node named as getNodeName(path). - - /// Since node name is just a hash we want to know to which file it corresponds, - /// so we keep "file_path" in nodes data. - /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by s3queue_tracked_file_ttl_sec. - /// "last_exception" is kept for introspection, should also be visible in system.s3queue_log if it is enabled. - /// "retries" is kept for retrying the processing enabled by s3queue_loading_retries. - NodeMetadata metadata; - metadata.file_path = path; - metadata.last_processed_timestamp = getCurrentTime(); - metadata.last_exception = exception; - metadata.retries = retries; - return metadata; -} - -bool S3QueueFilesMetadata::isShardedProcessing() const -{ - return getProcessingIdsNum() > 1 && mode == S3QueueMode::ORDERED; -} - -size_t S3QueueFilesMetadata::registerNewShard() -{ - if (!isShardedProcessing()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Cannot register a new shard, because processing is not sharded"); - } - - const auto zk_client = getZooKeeper(); - zk_client->createIfNotExists(zookeeper_shards_path, ""); - - std::string shard_node_path; - size_t shard_id = 0; - for (size_t i = 0; i < shards_num; ++i) - { - const auto node_path = getZooKeeperPathForShard(i); - auto err = zk_client->tryCreate(node_path, "", zkutil::CreateMode::Persistent); - if (err == Coordination::Error::ZOK) - { - shard_node_path = node_path; - shard_id = i; - break; - } - else if (err == Coordination::Error::ZNODEEXISTS) - continue; - else - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected error: {}", magic_enum::enum_name(err)); - } - - if (shard_node_path.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to register a new shard"); - - LOG_TRACE(log, "Using shard {} (zk node: {})", shard_id, shard_node_path); - return shard_id; -} - -std::string S3QueueFilesMetadata::getZooKeeperPathForShard(size_t shard_id) const -{ - return zookeeper_shards_path / ("shard" + toString(shard_id)); -} - -void S3QueueFilesMetadata::registerNewShard(size_t shard_id) -{ - if (!isShardedProcessing()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Cannot register a new shard, because processing is not sharded"); - } - - const auto zk_client = getZooKeeper(); - const auto node_path = getZooKeeperPathForShard(shard_id); - zk_client->createAncestors(node_path); - - auto err = zk_client->tryCreate(node_path, "", zkutil::CreateMode::Persistent); - if (err != Coordination::Error::ZOK) - { - if (err == Coordination::Error::ZNODEEXISTS) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot register shard {}: already exists", shard_id); - else - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected error: {}", magic_enum::enum_name(err)); - } -} - -bool S3QueueFilesMetadata::isShardRegistered(size_t shard_id) -{ - const auto zk_client = getZooKeeper(); - const auto node_path = getZooKeeperPathForShard(shard_id); - return zk_client->exists(node_path); -} - -void S3QueueFilesMetadata::unregisterShard(size_t shard_id) -{ - if (!isShardedProcessing()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Cannot unregister a shard, because processing is not sharded"); - } - - const auto zk_client = getZooKeeper(); - const auto node_path = getZooKeeperPathForShard(shard_id); - auto error_code = zk_client->tryRemove(node_path); - if (error_code != Coordination::Error::ZOK - && error_code != Coordination::Error::ZNONODE) - throw zkutil::KeeperException::fromPath(error_code, node_path); -} - -size_t S3QueueFilesMetadata::getProcessingIdsNum() const -{ - return shards_num * threads_per_shard; -} - -std::vector S3QueueFilesMetadata::getProcessingIdsForShard(size_t shard_id) const -{ - std::vector res(threads_per_shard); - std::iota(res.begin(), res.end(), shard_id * threads_per_shard); - return res; -} - -bool S3QueueFilesMetadata::isProcessingIdBelongsToShard(size_t id, size_t shard_id) const -{ - return shard_id * threads_per_shard <= id && id < (shard_id + 1) * threads_per_shard; -} - -size_t S3QueueFilesMetadata::getIdForProcessingThread(size_t thread_id, size_t shard_id) const -{ - return shard_id * threads_per_shard + thread_id; -} - -size_t S3QueueFilesMetadata::getProcessingIdForPath(const std::string & path) const -{ - return sipHash64(path) % getProcessingIdsNum(); -} - -S3QueueFilesMetadata::ProcessingNodeHolderPtr S3QueueFilesMetadata::trySetFileAsProcessing(const std::string & path) -{ - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueSetFileProcessingMicroseconds); - auto file_status = local_file_statuses.get(path, /* create */true); - - /// Check locally cached file status. - /// Processed or Failed state is always cached. - /// Processing state is cached only if processing is being done by current clickhouse server - /// (because If another server is doing the processing, - /// we cannot know if state changes without checking with zookeeper so there is no point in cache here). - - { - std::lock_guard lock(file_status->metadata_lock); - switch (file_status->state) - { - case FileStatus::State::Processing: - { - LOG_TEST(log, "File {} is already processing", path); - return {}; - } - case FileStatus::State::Processed: - { - LOG_TEST(log, "File {} is already processed", path); - return {}; - } - case FileStatus::State::Failed: - { - /// If max_loading_retries == 0, file is not retriable. - if (max_loading_retries == 0) - { - LOG_TEST(log, "File {} is failed and processing retries are disabled", path); - return {}; - } - - /// Otherwise file_status->retries is also cached. - /// In case file_status->retries >= max_loading_retries we can fully rely that it is true - /// and will not attempt processing it. - /// But in case file_status->retries < max_loading_retries we cannot be sure - /// (another server could have done a try after we cached retries value), - /// so check with zookeeper here. - if (file_status->retries >= max_loading_retries) - { - LOG_TEST(log, "File {} is failed and processing retries are exceeeded", path); - return {}; - } - - break; - } - case FileStatus::State::None: - { - /// The file was not processed by current server and file status was not cached, - /// check metadata in zookeeper. - break; - } - } - } - - /// Another thread could already be trying to set file as processing. - /// So there is no need to attempt the same, better to continue with the next file. - std::unique_lock processing_lock(file_status->processing_lock, std::defer_lock); - if (!processing_lock.try_lock()) - { - return {}; - } - - /// Let's go and check metadata in zookeeper and try to create a /processing ephemeral node. - /// If successful, return result with processing node holder. - SetFileProcessingResult result; - ProcessingNodeHolderPtr processing_node_holder; - - switch (mode) - { - case S3QueueMode::ORDERED: - { - std::tie(result, processing_node_holder) = trySetFileAsProcessingForOrderedMode(path, file_status); - break; - } - case S3QueueMode::UNORDERED: - { - std::tie(result, processing_node_holder) = trySetFileAsProcessingForUnorderedMode(path, file_status); - break; - } - } - - /// Cache file status, save some statistics. - switch (result) - { - case SetFileProcessingResult::Success: - { - std::lock_guard lock(file_status->metadata_lock); - file_status->state = FileStatus::State::Processing; - - file_status->profile_counters.increment(ProfileEvents::S3QueueSetFileProcessingMicroseconds, timer.get()); - timer.cancel(); - - if (!file_status->processing_start_time) - file_status->processing_start_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - - return processing_node_holder; - } - case SetFileProcessingResult::AlreadyProcessed: - { - std::lock_guard lock(file_status->metadata_lock); - file_status->state = FileStatus::State::Processed; - return {}; - } - case SetFileProcessingResult::AlreadyFailed: - { - std::lock_guard lock(file_status->metadata_lock); - file_status->state = FileStatus::State::Failed; - return {}; - } - case SetFileProcessingResult::ProcessingByOtherNode: - { - /// We cannot save any local state here, see comment above. - return {}; - } - } -} - -std::pair -S3QueueFilesMetadata::trySetFileAsProcessingForUnorderedMode(const std::string & path, const FileStatusPtr & file_status) -{ - /// In one zookeeper transaction do the following: - /// 1. check that corresponding persistent nodes do not exist in processed/ and failed/; - /// 2. create an ephemenral node in /processing if it does not exist; - /// Return corresponding status if any of the step failed. - - const auto node_name = getNodeName(path); - const auto zk_client = getZooKeeper(); - auto node_metadata = createNodeMetadata(path); - node_metadata.processing_id = getRandomASCIIString(10); - - Coordination::Requests requests; - - requests.push_back(zkutil::makeCreateRequest(zookeeper_processed_path / node_name, "", zkutil::CreateMode::Persistent)); - requests.push_back(zkutil::makeRemoveRequest(zookeeper_processed_path / node_name, -1)); - - requests.push_back(zkutil::makeCreateRequest(zookeeper_failed_path / node_name, "", zkutil::CreateMode::Persistent)); - requests.push_back(zkutil::makeRemoveRequest(zookeeper_failed_path / node_name, -1)); - - requests.push_back(zkutil::makeCreateRequest(zookeeper_processing_path / node_name, node_metadata.toString(), zkutil::CreateMode::Ephemeral)); - - Coordination::Responses responses; - auto code = zk_client->tryMulti(requests, responses); - - if (code == Coordination::Error::ZOK) - { - auto holder = std::make_unique( - node_metadata.processing_id, path, zookeeper_processing_path / node_name, file_status, zk_client); - return std::pair{SetFileProcessingResult::Success, std::move(holder)}; - } - - if (responses[0]->error != Coordination::Error::ZOK) - { - return std::pair{SetFileProcessingResult::AlreadyProcessed, nullptr}; - } - else if (responses[2]->error != Coordination::Error::ZOK) - { - return std::pair{SetFileProcessingResult::AlreadyFailed, nullptr}; - } - else if (responses[4]->error != Coordination::Error::ZOK) - { - return std::pair{SetFileProcessingResult::ProcessingByOtherNode, nullptr}; - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state of zookeeper transaction: {}", magic_enum::enum_name(code)); - } -} - -std::pair -S3QueueFilesMetadata::trySetFileAsProcessingForOrderedMode(const std::string & path, const FileStatusPtr & file_status) -{ - /// Same as for Unordered mode. - /// The only difference is the check if the file is already processed. - /// For Ordered mode we do not keep a separate /processed/hash_node for each file - /// but instead we only keep a maximum processed file - /// (since all files are ordered and new files have a lexically bigger name, it makes sense). - - const auto node_name = getNodeName(path); - const auto zk_client = getZooKeeper(); - auto node_metadata = createNodeMetadata(path); - node_metadata.processing_id = getRandomASCIIString(10); - - while (true) - { - /// Get a /processed node content - max_processed path. - /// Compare our path to it. - /// If file is not yet processed, check corresponding /failed node and try create /processing node - /// and in the same zookeeper transaction also check that /processed node did not change - /// in between, e.g. that stat.version remained the same. - /// If the version did change - retry (since we cannot do Get and Create requests - /// in the same zookeeper transaction, so we use a while loop with tries). - - auto processed_node = isShardedProcessing() - ? zookeeper_processed_path / toString(getProcessingIdForPath(path)) - : zookeeper_processed_path; - - NodeMetadata processed_node_metadata; - Coordination::Stat processed_node_stat; - std::string data; - auto processed_node_exists = zk_client->tryGet(processed_node, data, &processed_node_stat); - if (processed_node_exists && !data.empty()) - processed_node_metadata = NodeMetadata::fromString(data); - - auto max_processed_file_path = processed_node_metadata.file_path; - if (!max_processed_file_path.empty() && path <= max_processed_file_path) - { - LOG_TEST(log, "File {} is already processed, max processed file: {}", path, max_processed_file_path); - return std::pair{SetFileProcessingResult::AlreadyProcessed, nullptr}; - } - - Coordination::Requests requests; - requests.push_back(zkutil::makeCreateRequest(zookeeper_failed_path / node_name, "", zkutil::CreateMode::Persistent)); - requests.push_back(zkutil::makeRemoveRequest(zookeeper_failed_path / node_name, -1)); - - requests.push_back(zkutil::makeCreateRequest(zookeeper_processing_path / node_name, node_metadata.toString(), zkutil::CreateMode::Ephemeral)); - - if (processed_node_exists) - { - requests.push_back(zkutil::makeCheckRequest(processed_node, processed_node_stat.version)); - } - else - { - requests.push_back(zkutil::makeCreateRequest(processed_node, "", zkutil::CreateMode::Persistent)); - requests.push_back(zkutil::makeRemoveRequest(processed_node, -1)); - } - - Coordination::Responses responses; - auto code = zk_client->tryMulti(requests, responses); - if (code == Coordination::Error::ZOK) - { - auto holder = std::make_unique( - node_metadata.processing_id, path, zookeeper_processing_path / node_name, file_status, zk_client); - - LOG_TEST(log, "File {} is ready to be processed", path); - return std::pair{SetFileProcessingResult::Success, std::move(holder)}; - } - - if (responses[0]->error != Coordination::Error::ZOK) - { - LOG_TEST(log, "Skipping file `{}`: failed", path); - return std::pair{SetFileProcessingResult::AlreadyFailed, nullptr}; - } - else if (responses[2]->error != Coordination::Error::ZOK) - { - LOG_TEST(log, "Skipping file `{}`: already processing", path); - return std::pair{SetFileProcessingResult::ProcessingByOtherNode, nullptr}; - } - else - { - LOG_TEST(log, "Version of max processed file changed. Retrying the check for file `{}`", path); - } - } -} - -void S3QueueFilesMetadata::setFileProcessed(ProcessingNodeHolderPtr holder) -{ - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueSetFileProcessedMicroseconds); - auto file_status = holder->getFileStatus(); - { - std::lock_guard lock(file_status->metadata_lock); - file_status->state = FileStatus::State::Processed; - file_status->processing_end_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - } - - SCOPE_EXIT({ - file_status->profile_counters.increment(ProfileEvents::S3QueueSetFileProcessedMicroseconds, timer.get()); - timer.cancel(); - }); - - switch (mode) - { - case S3QueueMode::ORDERED: - { - return setFileProcessedForOrderedMode(holder); - } - case S3QueueMode::UNORDERED: - { - return setFileProcessedForUnorderedMode(holder); - } - } -} - -void S3QueueFilesMetadata::setFileProcessedForUnorderedMode(ProcessingNodeHolderPtr holder) -{ - /// Create a persistent node in /processed and remove ephemeral node from /processing. - - const auto & path = holder->path; - const auto node_name = getNodeName(path); - const auto node_metadata = createNodeMetadata(path).toString(); - const auto zk_client = getZooKeeper(); - - Coordination::Requests requests; - requests.push_back(zkutil::makeCreateRequest(zookeeper_processed_path / node_name, node_metadata, zkutil::CreateMode::Persistent)); - - Coordination::Responses responses; - if (holder->remove(&requests, &responses)) - { - LOG_TEST(log, "Moved file `{}` to processed", path); - if (max_loading_retries) - zk_client->tryRemove(zookeeper_failed_path / (node_name + ".retriable"), -1); - return; - } - - if (!responses.empty() && responses[0]->error != Coordination::Error::ZOK) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Cannot create a persistent node in /processed since it already exists"); - } - - LOG_WARNING(log, - "Cannot set file ({}) as processed since ephemeral node in /processing" - "does not exist with expected id, " - "this could be a result of expired zookeeper session", path); -} - - -void S3QueueFilesMetadata::setFileProcessedForOrderedMode(ProcessingNodeHolderPtr holder) -{ - auto processed_node_path = isShardedProcessing() - ? zookeeper_processed_path / toString(getProcessingIdForPath(holder->path)) - : zookeeper_processed_path; - - return setFileProcessedForOrderedModeImpl(holder->path, holder, processed_node_path); -} - -void S3QueueFilesMetadata::setFileProcessedForOrderedModeImpl( - const std::string & path, ProcessingNodeHolderPtr holder, const std::string & processed_node_path) -{ - /// Update a persistent node in /processed and remove ephemeral node from /processing. - - const auto node_name = getNodeName(path); - const auto node_metadata = createNodeMetadata(path).toString(); - const auto zk_client = getZooKeeper(); - - LOG_TEST(log, "Setting file `{}` as processed (at {})", path, processed_node_path); - while (true) - { - std::string res; - Coordination::Stat stat; - bool exists = zk_client->tryGet(processed_node_path, res, &stat); - Coordination::Requests requests; - if (exists) - { - if (!res.empty()) - { - auto metadata = NodeMetadata::fromString(res); - if (metadata.file_path >= path) - { - LOG_TRACE(log, "File {} is already processed, current max processed file: {}", path, metadata.file_path); - return; - } - } - requests.push_back(zkutil::makeSetRequest(processed_node_path, node_metadata, stat.version)); - } - else - { - requests.push_back(zkutil::makeCreateRequest(processed_node_path, node_metadata, zkutil::CreateMode::Persistent)); - } - - Coordination::Responses responses; - if (holder) - { - if (holder->remove(&requests, &responses)) - { - LOG_TEST(log, "Moved file `{}` to processed", path); - if (max_loading_retries) - zk_client->tryRemove(zookeeper_failed_path / (node_name + ".retriable"), -1); - return; - } - } - else - { - auto code = zk_client->tryMulti(requests, responses); - if (code == Coordination::Error::ZOK) - { - LOG_TEST(log, "Moved file `{}` to processed", path); - return; - } - } - - /// Failed to update max processed node, retry. - if (!responses.empty() && responses[0]->error != Coordination::Error::ZOK) - { - LOG_TRACE(log, "Failed to update processed node ({}). Will retry.", magic_enum::enum_name(responses[0]->error)); - continue; - } - - LOG_WARNING(log, "Cannot set file ({}) as processed since processing node " - "does not exist with expected processing id does not exist, " - "this could be a result of expired zookeeper session", path); - return; - } -} - -void S3QueueFilesMetadata::setFileProcessed(const std::string & path, size_t shard_id) -{ - if (mode != S3QueueMode::ORDERED) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can set file as preprocessed only for Ordered mode"); - - if (isShardedProcessing()) - { - for (const auto & processor : getProcessingIdsForShard(shard_id)) - setFileProcessedForOrderedModeImpl(path, nullptr, zookeeper_processed_path / toString(processor)); - } - else - { - setFileProcessedForOrderedModeImpl(path, nullptr, zookeeper_processed_path); - } -} - -void S3QueueFilesMetadata::setFileFailed(ProcessingNodeHolderPtr holder, const String & exception_message) -{ - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueSetFileFailedMicroseconds); - const auto & path = holder->path; - - auto file_status = holder->getFileStatus(); - { - std::lock_guard lock(file_status->metadata_lock); - file_status->state = FileStatus::State::Failed; - file_status->last_exception = exception_message; - file_status->processing_end_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - } - - SCOPE_EXIT({ - file_status->profile_counters.increment(ProfileEvents::S3QueueSetFileFailedMicroseconds, timer.get()); - timer.cancel(); - }); - - const auto node_name = getNodeName(path); - auto node_metadata = createNodeMetadata(path, exception_message); - const auto zk_client = getZooKeeper(); - - /// Is file retriable? - if (max_loading_retries == 0) - { - /// File is not retriable, - /// just create a node in /failed and remove a node from /processing. - - Coordination::Requests requests; - requests.push_back(zkutil::makeCreateRequest(zookeeper_failed_path / node_name, - node_metadata.toString(), - zkutil::CreateMode::Persistent)); - Coordination::Responses responses; - if (holder->remove(&requests, &responses)) - { - LOG_TEST(log, "File `{}` failed to process and will not be retried. " - "Error: {}", path, exception_message); - return; - } - - if (responses[0]->error != Coordination::Error::ZOK) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Cannot create a persistent node in /failed since it already exists"); - } - - LOG_WARNING(log, "Cannot set file ({}) as processed since processing node " - "does not exist with expected processing id does not exist, " - "this could be a result of expired zookeeper session", path); - return; - } - - /// So file is retriable. - /// Let's do an optimization here. - /// Instead of creating a persistent /failed/node_hash node - /// we create a persistent /failed/node_hash.retriable node. - /// This allows us to make less zookeeper requests as we avoid checking - /// the number of already done retries in trySetFileAsProcessing. - - const auto node_name_with_retriable_suffix = node_name + ".retriable"; - Coordination::Stat stat; - std::string res; - - /// Extract the number of already done retries from node_hash.retriable node if it exists. - if (zk_client->tryGet(zookeeper_failed_path / node_name_with_retriable_suffix, res, &stat)) - { - auto failed_node_metadata = NodeMetadata::fromString(res); - node_metadata.retries = failed_node_metadata.retries + 1; - - std::lock_guard lock(file_status->metadata_lock); - file_status->retries = node_metadata.retries; - } - - LOG_TEST(log, "File `{}` failed to process, try {}/{} (Error: {})", - path, node_metadata.retries, max_loading_retries, exception_message); - - /// Check if file can be retried further or not. - if (node_metadata.retries >= max_loading_retries) - { - /// File is no longer retriable. - /// Make a persistent node /failed/node_hash, remove /failed/node_hash.retriable node and node in /processing. - - Coordination::Requests requests; - requests.push_back(zkutil::makeRemoveRequest(zookeeper_processing_path / node_name, -1)); - requests.push_back(zkutil::makeRemoveRequest(zookeeper_failed_path / node_name_with_retriable_suffix, - stat.version)); - requests.push_back(zkutil::makeCreateRequest(zookeeper_failed_path / node_name, - node_metadata.toString(), - zkutil::CreateMode::Persistent)); - - Coordination::Responses responses; - auto code = zk_client->tryMulti(requests, responses); - if (code == Coordination::Error::ZOK) - return; - - throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set file as failed"); - } - else - { - /// File is still retriable, update retries count and remove node from /processing. - - Coordination::Requests requests; - requests.push_back(zkutil::makeRemoveRequest(zookeeper_processing_path / node_name, -1)); - if (node_metadata.retries == 0) - { - requests.push_back(zkutil::makeCreateRequest(zookeeper_failed_path / node_name_with_retriable_suffix, - node_metadata.toString(), - zkutil::CreateMode::Persistent)); - } - else - { - requests.push_back(zkutil::makeSetRequest(zookeeper_failed_path / node_name_with_retriable_suffix, - node_metadata.toString(), - stat.version)); - } - Coordination::Responses responses; - auto code = zk_client->tryMulti(requests, responses); - if (code == Coordination::Error::ZOK) - return; - - throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set file as failed"); - } -} - -S3QueueFilesMetadata::ProcessingNodeHolder::ProcessingNodeHolder( - const std::string & processing_id_, - const std::string & path_, - const std::string & zk_node_path_, - FileStatusPtr file_status_, - zkutil::ZooKeeperPtr zk_client_) - : zk_client(zk_client_) - , file_status(file_status_) - , path(path_) - , zk_node_path(zk_node_path_) - , processing_id(processing_id_) - , log(getLogger("ProcessingNodeHolder")) -{ -} - -S3QueueFilesMetadata::ProcessingNodeHolder::~ProcessingNodeHolder() -{ - if (!removed) - remove(); -} - -bool S3QueueFilesMetadata::ProcessingNodeHolder::remove(Coordination::Requests * requests, Coordination::Responses * responses) -{ - if (removed) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Processing node is already removed"); - - LOG_TEST(log, "Removing processing node {} ({})", zk_node_path, path); - - try - { - if (!zk_client->expired()) - { - /// Is is possible that we created an ephemeral processing node - /// but session expired and someone other created an ephemeral processing node. - /// To avoid deleting this new node, check processing_id. - std::string res; - Coordination::Stat stat; - if (zk_client->tryGet(zk_node_path, res, &stat)) - { - auto node_metadata = NodeMetadata::fromString(res); - if (node_metadata.processing_id == processing_id) - { - if (requests) - { - requests->push_back(zkutil::makeRemoveRequest(zk_node_path, stat.version)); - auto code = zk_client->tryMulti(*requests, *responses); - removed = code == Coordination::Error::ZOK; - } - else - { - zk_client->remove(zk_node_path); - removed = true; - } - return removed; - } - else - LOG_WARNING(log, "Cannot remove {} since processing id changed: {} -> {}", - zk_node_path, processing_id, node_metadata.processing_id); - } - else - LOG_DEBUG(log, "Cannot remove {}, node doesn't exist, " - "probably because of session expiration", zk_node_path); - - /// TODO: this actually would mean that we already processed (or partially processed) - /// the data but another thread will try processing it again and data can be duplicated. - /// This can be solved via persistenly saving last processed offset in the file. - } - else - { - ProfileEvents::increment(ProfileEvents::CannotRemoveEphemeralNode); - LOG_DEBUG(log, "Cannot remove {} since session has been expired", zk_node_path); - } - } - catch (...) - { - ProfileEvents::increment(ProfileEvents::CannotRemoveEphemeralNode); - DB::tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot remove " + zk_node_path); - } - return false; -} - -void S3QueueFilesMetadata::cleanupThreadFunc() -{ - /// A background task is responsible for maintaining - /// max_set_size and max_set_age settings for `unordered` processing mode. - - if (shutdown) - return; - - try - { - cleanupThreadFuncImpl(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - if (shutdown) - return; - - task->scheduleAfter(generateRescheduleInterval(min_cleanup_interval_ms, max_cleanup_interval_ms)); -} - -void S3QueueFilesMetadata::cleanupThreadFuncImpl() -{ - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueCleanupMaxSetSizeOrTTLMicroseconds); - - chassert(max_set_size || max_set_age_sec); - - const bool check_nodes_limit = max_set_size > 0; - const bool check_nodes_ttl = max_set_age_sec > 0; - - const auto zk_client = getZooKeeper(); - Strings nodes; - auto code = zk_client->tryGetChildren(zookeeper_processed_path, nodes); - if (code != Coordination::Error::ZOK) - { - if (code == Coordination::Error::ZNONODE) - { - LOG_TEST(log, "A `processed` not is not yet created"); - return; - } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error: {}", magic_enum::enum_name(code)); - } - - if (nodes.empty()) - { - LOG_TEST(log, "A set of nodes is empty"); - return; - } - - const bool nodes_limit_exceeded = nodes.size() > max_set_size; - if (!nodes_limit_exceeded && check_nodes_limit && !check_nodes_ttl) - { - LOG_TEST(log, "No limit exceeded"); - return; - } - - /// Create a lock so that with distributed processing - /// multiple nodes do not execute cleanup in parallel. - auto ephemeral_node = zkutil::EphemeralNodeHolder::tryCreate(zookeeper_cleanup_lock_path, *zk_client, toString(getCurrentTime())); - if (!ephemeral_node) - { - LOG_TEST(log, "Cleanup is already being executed by another node"); - return; - } - /// TODO because of this lock we might not update local file statuses on time on one of the nodes. - - struct Node - { - std::string name; - NodeMetadata metadata; - }; - auto node_cmp = [](const Node & a, const Node & b) - { - return std::tie(a.metadata.last_processed_timestamp, a.metadata.file_path) - < std::tie(b.metadata.last_processed_timestamp, b.metadata.file_path); - }; - - /// Ordered in ascending order of timestamps. - std::set sorted_nodes(node_cmp); - - LOG_TRACE(log, "Found {} nodes", nodes.size()); - - for (const auto & node : nodes) - { - try - { - std::string metadata_str; - if (zk_client->tryGet(zookeeper_processed_path / node, metadata_str)) - { - sorted_nodes.emplace(node, NodeMetadata::fromString(metadata_str)); - LOG_TEST(log, "Fetched metadata for node {}", node); - } - else - LOG_TEST(log, "Failed to fetch node metadata {}", node); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - auto get_nodes_str = [&]() - { - WriteBufferFromOwnString wb; - for (const auto & [node, metadata] : sorted_nodes) - wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp); - return wb.str(); - }; - LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", max_set_size, max_set_age_sec, get_nodes_str()); - - size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes.size() - max_set_size : 0; - for (const auto & node : sorted_nodes) - { - if (nodes_to_remove) - { - auto path = zookeeper_processed_path / node.name; - LOG_TEST(log, "Removing node at path {} ({}) because max files limit is reached", - node.metadata.file_path, path.string()); - - local_file_statuses.remove(node.metadata.file_path, /* if_exists */true); - - code = zk_client->tryRemove(path); - if (code == Coordination::Error::ZOK) - --nodes_to_remove; - else - LOG_ERROR(log, "Failed to remove a node `{}` (code: {})", path.string(), code); - } - else if (check_nodes_ttl) - { - UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp; - if (node_age >= max_set_age_sec) - { - auto path = zookeeper_processed_path / node.name; - LOG_TEST(log, "Removing node at path {} ({}) because file is reached", - node.metadata.file_path, path.string()); - - local_file_statuses.remove(node.metadata.file_path, /* if_exists */true); - - code = zk_client->tryRemove(path); - if (code != Coordination::Error::ZOK) - LOG_ERROR(log, "Failed to remove a node `{}` (code: {})", path.string(), code); - } - else if (!nodes_to_remove) - { - /// Nodes limit satisfied. - /// Nodes ttl satisfied as well as if current node is under tll, then all remaining as well - /// (because we are iterating in timestamp ascending order). - break; - } - } - else - { - /// Nodes limit and ttl are satisfied. - break; - } - } - - LOG_TRACE(log, "Node limits check finished"); -} - -bool S3QueueFilesMetadata::checkSettings(const S3QueueSettings & settings) const -{ - return mode == settings.mode - && max_set_size == settings.s3queue_tracked_files_limit.value - && max_set_age_sec == settings.s3queue_tracked_file_ttl_sec.value - && max_loading_retries == settings.s3queue_loading_retries.value - && min_cleanup_interval_ms == settings.s3queue_cleanup_interval_min_ms.value - && max_cleanup_interval_ms == settings.s3queue_cleanup_interval_max_ms.value; -} - -} diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.h b/src/Storages/S3Queue/S3QueueFilesMetadata.h deleted file mode 100644 index 9301ea7ceb8..00000000000 --- a/src/Storages/S3Queue/S3QueueFilesMetadata.h +++ /dev/null @@ -1,214 +0,0 @@ -#pragma once -#include "config.h" - -#include -#include -#include -#include -#include - -namespace fs = std::filesystem; -namespace Poco { class Logger; } - -namespace DB -{ -struct S3QueueSettings; -class StorageS3Queue; - -/** - * A class for managing S3Queue metadata in zookeeper, e.g. - * the following folders: - * - /processing - * - /processed - * - /failed - * - * Depending on S3Queue processing mode (ordered or unordered) - * we can differently store metadata in /processed node. - * - * Implements caching of zookeeper metadata for faster responses. - * Cached part is located in LocalFileStatuses. - * - * In case of Unordered mode - if files TTL is enabled or maximum tracked files limit is set - * starts a background cleanup thread which is responsible for maintaining them. - */ -class S3QueueFilesMetadata -{ -public: - class ProcessingNodeHolder; - using ProcessingNodeHolderPtr = std::shared_ptr; - - S3QueueFilesMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_); - - ~S3QueueFilesMetadata(); - - void setFileProcessed(ProcessingNodeHolderPtr holder); - void setFileProcessed(const std::string & path, size_t shard_id); - - void setFileFailed(ProcessingNodeHolderPtr holder, const std::string & exception_message); - - struct FileStatus - { - enum class State - { - Processing, - Processed, - Failed, - None - }; - State state = State::None; - - std::atomic processed_rows = 0; - time_t processing_start_time = 0; - time_t processing_end_time = 0; - size_t retries = 0; - std::string last_exception; - ProfileEvents::Counters profile_counters; - - std::mutex processing_lock; - std::mutex metadata_lock; - }; - using FileStatusPtr = std::shared_ptr; - using FileStatuses = std::unordered_map; - - /// Set file as processing, if it is not alreaty processed, failed or processing. - ProcessingNodeHolderPtr trySetFileAsProcessing(const std::string & path); - - FileStatusPtr getFileStatus(const std::string & path); - - FileStatuses getFileStateses() const { return local_file_statuses.getAll(); } - - bool checkSettings(const S3QueueSettings & settings) const; - - void deactivateCleanupTask(); - - /// Should the table use sharded processing? - /// We use sharded processing for Ordered mode of S3Queue table. - /// It allows to parallelize processing within a single server - /// and to allow distributed processing. - bool isShardedProcessing() const; - - /// Register a new shard for processing. - /// Return a shard id of registered shard. - size_t registerNewShard(); - /// Register a new shard for processing by given id. - /// Throws exception if shard by this id is already registered. - void registerNewShard(size_t shard_id); - /// Unregister shard from keeper. - void unregisterShard(size_t shard_id); - bool isShardRegistered(size_t shard_id); - - /// Total number of processing ids. - /// A processing id identifies a single processing thread. - /// There might be several processing ids per shard. - size_t getProcessingIdsNum() const; - /// Get processing ids identified with requested shard. - std::vector getProcessingIdsForShard(size_t shard_id) const; - /// Check if given processing id belongs to a given shard. - bool isProcessingIdBelongsToShard(size_t id, size_t shard_id) const; - /// Get a processing id for processing thread by given thread id. - /// thread id is a value in range [0, threads_per_shard]. - size_t getIdForProcessingThread(size_t thread_id, size_t shard_id) const; - - /// Calculate which processing id corresponds to a given file path. - /// The file will be processed by a thread related to this processing id. - size_t getProcessingIdForPath(const std::string & path) const; - -private: - const S3QueueMode mode; - const UInt64 max_set_size; - const UInt64 max_set_age_sec; - const UInt64 max_loading_retries; - const size_t min_cleanup_interval_ms; - const size_t max_cleanup_interval_ms; - const size_t shards_num; - const size_t threads_per_shard; - - const fs::path zookeeper_processing_path; - const fs::path zookeeper_processed_path; - const fs::path zookeeper_failed_path; - const fs::path zookeeper_shards_path; - const fs::path zookeeper_cleanup_lock_path; - - LoggerPtr log; - - std::atomic_bool shutdown = false; - BackgroundSchedulePool::TaskHolder task; - - std::string getNodeName(const std::string & path); - - zkutil::ZooKeeperPtr getZooKeeper() const; - - void setFileProcessedForOrderedMode(ProcessingNodeHolderPtr holder); - void setFileProcessedForUnorderedMode(ProcessingNodeHolderPtr holder); - std::string getZooKeeperPathForShard(size_t shard_id) const; - - void setFileProcessedForOrderedModeImpl( - const std::string & path, ProcessingNodeHolderPtr holder, const std::string & processed_node_path); - - enum class SetFileProcessingResult - { - Success, - ProcessingByOtherNode, - AlreadyProcessed, - AlreadyFailed, - }; - std::pair trySetFileAsProcessingForOrderedMode(const std::string & path, const FileStatusPtr & file_status); - std::pair trySetFileAsProcessingForUnorderedMode(const std::string & path, const FileStatusPtr & file_status); - - struct NodeMetadata - { - std::string file_path; UInt64 last_processed_timestamp = 0; - std::string last_exception; - UInt64 retries = 0; - std::string processing_id; /// For ephemeral processing node. - - std::string toString() const; - static NodeMetadata fromString(const std::string & metadata_str); - }; - - NodeMetadata createNodeMetadata(const std::string & path, const std::string & exception = "", size_t retries = 0); - - void cleanupThreadFunc(); - void cleanupThreadFuncImpl(); - - struct LocalFileStatuses - { - FileStatuses file_statuses; - mutable std::mutex mutex; - - FileStatuses getAll() const; - FileStatusPtr get(const std::string & filename, bool create); - bool remove(const std::string & filename, bool if_exists); - std::unique_lock lock() const; - }; - LocalFileStatuses local_file_statuses; -}; - -class S3QueueFilesMetadata::ProcessingNodeHolder -{ - friend class S3QueueFilesMetadata; -public: - ProcessingNodeHolder( - const std::string & processing_id_, - const std::string & path_, - const std::string & zk_node_path_, - FileStatusPtr file_status_, - zkutil::ZooKeeperPtr zk_client_); - - ~ProcessingNodeHolder(); - - FileStatusPtr getFileStatus() { return file_status; } - -private: - bool remove(Coordination::Requests * requests = nullptr, Coordination::Responses * responses = nullptr); - - zkutil::ZooKeeperPtr zk_client; - FileStatusPtr file_status; - std::string path; - std::string zk_node_path; - std::string processing_id; - bool removed = false; - LoggerPtr log; -}; - -} diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp b/src/Storages/S3Queue/S3QueueIFileMetadata.cpp new file mode 100644 index 00000000000..6c4089115d4 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueIFileMetadata.cpp @@ -0,0 +1,354 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace ProfileEvents +{ + extern const Event S3QueueProcessedFiles; + extern const Event S3QueueFailedFiles; +}; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + zkutil::ZooKeeperPtr getZooKeeper() + { + return Context::getGlobalContextInstance()->getZooKeeper(); + } + + time_t now() + { + return std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + } +} + +void S3QueueIFileMetadata::FileStatus::onProcessing() +{ + state = FileStatus::State::Processing; + processing_start_time = now(); +} + +void S3QueueIFileMetadata::FileStatus::onProcessed() +{ + state = FileStatus::State::Processed; + processing_end_time = now(); +} + +void S3QueueIFileMetadata::FileStatus::onFailed(const std::string & exception) +{ + state = FileStatus::State::Failed; + processing_end_time = now(); + std::lock_guard lock(last_exception_mutex); + last_exception = exception; +} + +std::string S3QueueIFileMetadata::FileStatus::getException() const +{ + std::lock_guard lock(last_exception_mutex); + return last_exception; +} + +std::string S3QueueIFileMetadata::NodeMetadata::toString() const +{ + Poco::JSON::Object json; + json.set("file_path", file_path); + json.set("last_processed_timestamp", now()); + json.set("last_exception", last_exception); + json.set("retries", retries); + json.set("processing_id", processing_id); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(json, oss); + return oss.str(); +} + +S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str) +{ + Poco::JSON::Parser parser; + auto json = parser.parse(metadata_str).extract(); + chassert(json); + + NodeMetadata metadata; + metadata.file_path = json->getValue("file_path"); + metadata.last_processed_timestamp = json->getValue("last_processed_timestamp"); + metadata.last_exception = json->getValue("last_exception"); + metadata.retries = json->getValue("retries"); + metadata.processing_id = json->getValue("processing_id"); + return metadata; +} + +S3QueueIFileMetadata::S3QueueIFileMetadata( + const std::string & path_, + const std::string & processing_node_path_, + const std::string & processed_node_path_, + const std::string & failed_node_path_, + FileStatusPtr file_status_, + size_t max_loading_retries_, + LoggerPtr log_) + : path(path_) + , node_name(getNodeName(path_)) + , file_status(file_status_) + , max_loading_retries(max_loading_retries_) + , processing_node_path(processing_node_path_) + , processed_node_path(processed_node_path_) + , failed_node_path(failed_node_path_) + , node_metadata(createNodeMetadata(path)) + , log(log_) + , processing_node_id_path(processing_node_path + "_processing_id") +{ + LOG_TEST(log, "Path: {}, node_name: {}, max_loading_retries: {}, " + "processed_path: {}, processing_path: {}, failed_path: {}", + path, node_name, max_loading_retries, + processed_node_path, processing_node_path, failed_node_path); +} + +S3QueueIFileMetadata::~S3QueueIFileMetadata() +{ + if (processing_id_version.has_value()) + { + file_status->onFailed("Uncaught exception"); + LOG_TEST(log, "Removing processing node in destructor for file: {}", path); + try + { + auto zk_client = getZooKeeper(); + + Coordination::Requests requests; + requests.push_back(zkutil::makeCheckRequest(processing_node_id_path, processing_id_version.value())); + requests.push_back(zkutil::makeRemoveRequest(processing_node_path, -1)); + + Coordination::Responses responses; + const auto code = zk_client->tryMulti(requests, responses); + if (code != Coordination::Error::ZOK + && !Coordination::isHardwareError(code) + && code != Coordination::Error::ZBADVERSION + && code != Coordination::Error::ZNONODE) + { + LOG_WARNING(log, "Unexpected error while removing processing node: {}", code); + chassert(false); + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + +std::string S3QueueIFileMetadata::getNodeName(const std::string & path) +{ + /// Since with are dealing with paths in s3 which can have "/", + /// we cannot create a zookeeper node with the name equal to path. + /// Therefore we use a hash of the path as a node name. + + SipHash path_hash; + path_hash.update(path); + return toString(path_hash.get64()); +} + +S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( + const std::string & path, + const std::string & exception, + size_t retries) +{ + /// Create a metadata which will be stored in a node named as getNodeName(path). + + /// Since node name is just a hash we want to know to which file it corresponds, + /// so we keep "file_path" in nodes data. + /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by s3queue_tracked_file_ttl_sec. + /// "last_exception" is kept for introspection, should also be visible in system.s3queue_log if it is enabled. + /// "retries" is kept for retrying the processing enabled by s3queue_loading_retries. + NodeMetadata metadata; + metadata.file_path = path; + metadata.last_processed_timestamp = now(); + metadata.last_exception = exception; + metadata.retries = retries; + return metadata; +} + +std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor_id) +{ + /// Add information which will be useful for debugging just in case. + Poco::JSON::Object json; + json.set("hostname", DNSResolver::instance().getHostName()); + json.set("processor_id", processor_id); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(json, oss); + return oss.str(); +} + +bool S3QueueIFileMetadata::setProcessing() +{ + auto state = file_status->state.load(); + if (state == FileStatus::State::Processing + || state == FileStatus::State::Processed + || (state == FileStatus::State::Failed && file_status->retries >= max_loading_retries)) + { + LOG_TEST(log, "File {} has non-processable state `{}`", path, file_status->state.load()); + return false; + } + + /// An optimization for local parallel processing. + std::unique_lock processing_lock(file_status->processing_lock, std::defer_lock); + if (!processing_lock.try_lock()) + return {}; + + auto [success, file_state] = setProcessingImpl(); + if (success) + file_status->onProcessing(); + else + file_status->updateState(file_state); + + LOG_TEST(log, "File {} has state `{}`: will {}process (processing id version: {})", + path, file_state, success ? "" : "not ", + processing_id_version.has_value() ? toString(processing_id_version.value()) : "None"); + + return success; +} + +void S3QueueIFileMetadata::setProcessed() +{ + LOG_TRACE(log, "Setting file {} as processed (path: {})", path, processed_node_path); + + ProfileEvents::increment(ProfileEvents::S3QueueProcessedFiles); + file_status->onProcessed(); + setProcessedImpl(); + + processing_id.reset(); + processing_id_version.reset(); + + LOG_TRACE(log, "Set file {} as processed (rows: {})", path, file_status->processed_rows); +} + +void S3QueueIFileMetadata::setFailed(const std::string & exception) +{ + LOG_TRACE(log, "Setting file {} as failed (exception: {}, path: {})", path, exception, failed_node_path); + + ProfileEvents::increment(ProfileEvents::S3QueueFailedFiles); + file_status->onFailed(exception); + node_metadata.last_exception = exception; + + if (max_loading_retries == 0) + setFailedNonRetriable(); + else + setFailedRetriable(); + + processing_id.reset(); + processing_id_version.reset(); + + LOG_TRACE(log, "Set file {} as failed (rows: {})", path, file_status->processed_rows); +} + +void S3QueueIFileMetadata::setFailedNonRetriable() +{ + auto zk_client = getZooKeeper(); + Coordination::Requests requests; + requests.push_back(zkutil::makeCreateRequest(failed_node_path, node_metadata.toString(), zkutil::CreateMode::Persistent)); + requests.push_back(zkutil::makeRemoveRequest(processing_node_path, -1)); + + Coordination::Responses responses; + const auto code = zk_client->tryMulti(requests, responses); + if (code == Coordination::Error::ZOK) + { + LOG_TRACE(log, "File `{}` failed to process and will not be retried. ", path); + return; + } + + if (Coordination::isHardwareError(responses[0]->error)) + { + LOG_WARNING(log, "Cannot set file as failed: lost connection to keeper"); + return; + } + + if (responses[0]->error == Coordination::Error::ZNODEEXISTS) + { + LOG_WARNING(log, "Cannot create a persistent node in /failed since it already exists"); + chassert(false); + return; + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error while setting file as failed: {}", code); +} + +void S3QueueIFileMetadata::setFailedRetriable() +{ + /// Instead of creating a persistent /failed/node_hash node + /// we create a persistent /failed/node_hash.retriable node. + /// This allows us to make less zookeeper requests as we avoid checking + /// the number of already done retries in trySetFileAsProcessing. + + auto retrieable_failed_node_path = failed_node_path + ".retriable"; + auto zk_client = getZooKeeper(); + + /// Extract the number of already done retries from node_hash.retriable node if it exists. + Coordination::Stat stat; + std::string res; + if (zk_client->tryGet(retrieable_failed_node_path, res, &stat)) + { + auto failed_node_metadata = NodeMetadata::fromString(res); + node_metadata.retries = failed_node_metadata.retries + 1; + file_status->retries = node_metadata.retries; + } + + LOG_TRACE(log, "File `{}` failed to process, try {}/{}", + path, node_metadata.retries, max_loading_retries); + + Coordination::Requests requests; + if (node_metadata.retries >= max_loading_retries) + { + /// File is no longer retriable. + /// Make a persistent node /failed/node_hash, + /// remove /failed/node_hash.retriable node and node in /processing. + + requests.push_back(zkutil::makeRemoveRequest(processing_node_path, -1)); + requests.push_back(zkutil::makeRemoveRequest(retrieable_failed_node_path, stat.version)); + requests.push_back( + zkutil::makeCreateRequest( + failed_node_path, node_metadata.toString(), zkutil::CreateMode::Persistent)); + + } + else + { + /// File is still retriable, + /// update retries count and remove node from /processing. + + requests.push_back(zkutil::makeRemoveRequest(processing_node_path, -1)); + if (node_metadata.retries == 0) + { + requests.push_back( + zkutil::makeCreateRequest( + retrieable_failed_node_path, node_metadata.toString(), zkutil::CreateMode::Persistent)); + } + else + { + requests.push_back( + zkutil::makeSetRequest( + retrieable_failed_node_path, node_metadata.toString(), stat.version)); + } + } + + Coordination::Responses responses; + auto code = zk_client->tryMulti(requests, responses); + if (code == Coordination::Error::ZOK) + return; + + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Failed to set file {} as failed (code: {})", path, code); +} + +} diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.h b/src/Storages/S3Queue/S3QueueIFileMetadata.h new file mode 100644 index 00000000000..e0b0d16cbcc --- /dev/null +++ b/src/Storages/S3Queue/S3QueueIFileMetadata.h @@ -0,0 +1,114 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class S3QueueIFileMetadata +{ +public: + struct FileStatus + { + enum class State : uint8_t + { + Processing, + Processed, + Failed, + None + }; + + void onProcessing(); + void onProcessed(); + void onFailed(const std::string & exception); + void updateState(State state_) { state = state_; } + + std::string getException() const; + + std::mutex processing_lock; + + std::atomic state = State::None; + std::atomic processed_rows = 0; + std::atomic processing_start_time = 0; + std::atomic processing_end_time = 0; + std::atomic retries = 0; + ProfileEvents::Counters profile_counters; + + private: + mutable std::mutex last_exception_mutex; + std::string last_exception; + }; + using FileStatusPtr = std::shared_ptr; + + explicit S3QueueIFileMetadata( + const std::string & path_, + const std::string & processing_node_path_, + const std::string & processed_node_path_, + const std::string & failed_node_path_, + FileStatusPtr file_status_, + size_t max_loading_retries_, + LoggerPtr log_); + + virtual ~S3QueueIFileMetadata(); + + bool setProcessing(); + void setProcessed(); + void setFailed(const std::string & exception); + + virtual void setProcessedAtStartRequests( + Coordination::Requests & requests, + const zkutil::ZooKeeperPtr & zk_client) = 0; + + FileStatusPtr getFileStatus() { return file_status; } + + struct NodeMetadata + { + std::string file_path; UInt64 last_processed_timestamp = 0; + std::string last_exception; + UInt64 retries = 0; + std::string processing_id; /// For ephemeral processing node. + + std::string toString() const; + static NodeMetadata fromString(const std::string & metadata_str); + }; + +protected: + virtual std::pair setProcessingImpl() = 0; + virtual void setProcessedImpl() = 0; + void setFailedNonRetriable(); + void setFailedRetriable(); + + const std::string path; + const std::string node_name; + const FileStatusPtr file_status; + const size_t max_loading_retries; + + const std::string processing_node_path; + const std::string processed_node_path; + const std::string failed_node_path; + + NodeMetadata node_metadata; + LoggerPtr log; + + /// processing node is ephemeral, so we cannot verify with it if + /// this node was created by a certain processor on a previous s3 queue processing stage, + /// because we could get a session expired in between the stages + /// and someone else could just create this processing node. + /// Therefore we also create a persistent processing node + /// which is updated on each creation of ephemeral processing node. + /// We use the version of this node to verify the version of the processing ephemeral node. + const std::string processing_node_id_path; + /// Id of the processor. + std::optional processing_id; + /// Version of the processing id persistent node. + std::optional processing_id_version; + + static std::string getNodeName(const std::string & path); + + static NodeMetadata createNodeMetadata(const std::string & path, const std::string & exception = {}, size_t retries = 0); + + static std::string getProcessorInfo(const std::string & processor_id); +}; + +} diff --git a/src/Storages/S3Queue/S3QueueMetadata.cpp b/src/Storages/S3Queue/S3QueueMetadata.cpp new file mode 100644 index 00000000000..f4c8c5c5ef2 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueMetadata.cpp @@ -0,0 +1,485 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace ProfileEvents +{ + extern const Event S3QueueSetFileProcessingMicroseconds; + extern const Event S3QueueSetFileProcessedMicroseconds; + extern const Event S3QueueSetFileFailedMicroseconds; + extern const Event S3QueueFailedFiles; + extern const Event S3QueueProcessedFiles; + extern const Event S3QueueCleanupMaxSetSizeOrTTLMicroseconds; + extern const Event S3QueueLockLocalFileStatusesMicroseconds; +}; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int REPLICA_ALREADY_EXISTS; + extern const int INCOMPATIBLE_COLUMNS; +} + +namespace +{ + UInt64 getCurrentTime() + { + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + } + + size_t generateRescheduleInterval(size_t min, size_t max) + { + /// Use more or less random interval for unordered mode cleanup task. + /// So that distributed processing cleanup tasks would not schedule cleanup at the same time. + pcg64 rng(randomSeed()); + return min + rng() % (max - min + 1); + } + + zkutil::ZooKeeperPtr getZooKeeper() + { + return Context::getGlobalContextInstance()->getZooKeeper(); + } +} + +class S3QueueMetadata::LocalFileStatuses +{ +public: + LocalFileStatuses() = default; + + FileStatuses getAll() const + { + auto lk = lock(); + return file_statuses; + } + + FileStatusPtr get(const std::string & filename, bool create) + { + auto lk = lock(); + auto it = file_statuses.find(filename); + if (it == file_statuses.end()) + { + if (create) + it = file_statuses.emplace(filename, std::make_shared()).first; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "File status for {} doesn't exist", filename); + } + return it->second; + } + + bool remove(const std::string & filename, bool if_exists) + { + auto lk = lock(); + auto it = file_statuses.find(filename); + if (it == file_statuses.end()) + { + if (if_exists) + return false; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "File status for {} doesn't exist", filename); + } + file_statuses.erase(it); + return true; + } + +private: + FileStatuses file_statuses; + mutable std::mutex mutex; + + std::unique_lock lock() const + { + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueLockLocalFileStatusesMicroseconds); + return std::unique_lock(mutex); + } +}; + +S3QueueMetadata::S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_) + : settings(settings_) + , zookeeper_path(zookeeper_path_) + , buckets_num(getBucketsNum(settings_)) + , log(getLogger("StorageS3Queue(" + zookeeper_path_.string() + ")")) + , local_file_statuses(std::make_shared()) +{ + if (settings.mode == S3QueueMode::UNORDERED + && (settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec)) + { + task = Context::getGlobalContextInstance()->getSchedulePool().createTask( + "S3QueueCleanupFunc", + [this] { cleanupThreadFunc(); }); + + task->activate(); + task->scheduleAfter( + generateRescheduleInterval( + settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); + } +} + +S3QueueMetadata::~S3QueueMetadata() +{ + shutdown(); +} + +void S3QueueMetadata::shutdown() +{ + shutdown_called = true; + if (task) + task->deactivate(); +} + +void S3QueueMetadata::checkSettings(const S3QueueSettings & settings_) const +{ + S3QueueTableMetadata::checkEquals(settings, settings_); +} + +S3QueueMetadata::FileStatusPtr S3QueueMetadata::getFileStatus(const std::string & path) +{ + return local_file_statuses->get(path, /* create */false); +} + +S3QueueMetadata::FileStatuses S3QueueMetadata::getFileStatuses() const +{ + return local_file_statuses->getAll(); +} + +S3QueueMetadata::FileMetadataPtr S3QueueMetadata::getFileMetadata( + const std::string & path, + S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info) +{ + auto file_status = local_file_statuses->get(path, /* create */true); + switch (settings.mode) + { + case S3QueueMode::ORDERED: + return std::make_shared( + zookeeper_path, + path, + file_status, + bucket_info, + buckets_num, + settings.s3queue_loading_retries, + log); + case S3QueueMode::UNORDERED: + return std::make_shared( + zookeeper_path, + path, + file_status, + settings.s3queue_loading_retries, + log); + } +} + +size_t S3QueueMetadata::getBucketsNum(const S3QueueSettings & settings) +{ + if (settings.s3queue_buckets) + return settings.s3queue_buckets; + if (settings.s3queue_processing_threads_num) + return settings.s3queue_processing_threads_num; + return 0; +} + +size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings) +{ + if (settings.buckets) + return settings.buckets; + if (settings.processing_threads_num) + return settings.processing_threads_num; + return 0; +} + +bool S3QueueMetadata::useBucketsForProcessing() const +{ + return settings.mode == S3QueueMode::ORDERED && (buckets_num > 1); +} + +S3QueueMetadata::Bucket S3QueueMetadata::getBucketForPath(const std::string & path) const +{ + return S3QueueOrderedFileMetadata::getBucketForPath(path, buckets_num); +} + +S3QueueOrderedFileMetadata::BucketHolderPtr +S3QueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor) +{ + return S3QueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor); +} + +void S3QueueMetadata::initialize( + const ConfigurationPtr & configuration, + const StorageInMemoryMetadata & storage_metadata) +{ + const auto metadata_from_table = S3QueueTableMetadata(*configuration, settings, storage_metadata); + const auto & columns_from_table = storage_metadata.getColumns(); + const auto table_metadata_path = zookeeper_path / "metadata"; + const auto metadata_paths = settings.mode == S3QueueMode::ORDERED + ? S3QueueOrderedFileMetadata::getMetadataPaths(buckets_num) + : S3QueueUnorderedFileMetadata::getMetadataPaths(); + + auto zookeeper = getZooKeeper(); + zookeeper->createAncestors(zookeeper_path); + + for (size_t i = 0; i < 1000; ++i) + { + if (zookeeper->exists(table_metadata_path)) + { + const auto metadata_from_zk = S3QueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); + const auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns); + + metadata_from_table.checkEquals(metadata_from_zk); + if (columns_from_zk != columns_from_table) + { + throw Exception( + ErrorCodes::INCOMPATIBLE_COLUMNS, + "Table columns structure in ZooKeeper is different from local table structure. " + "Local columns:\n{}\nZookeeper columns:\n{}", + columns_from_table.toString(), columns_from_zk.toString()); + } + return; + } + + Coordination::Requests requests; + requests.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent)); + requests.emplace_back(zkutil::makeCreateRequest(table_metadata_path, metadata_from_table.toString(), zkutil::CreateMode::Persistent)); + + for (const auto & path : metadata_paths) + { + const auto zk_path = zookeeper_path / path; + requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); + } + + if (!settings.s3queue_last_processed_path.value.empty()) + getFileMetadata(settings.s3queue_last_processed_path)->setProcessedAtStartRequests(requests, zookeeper); + + Coordination::Responses responses; + auto code = zookeeper->tryMulti(requests, responses); + if (code == Coordination::Error::ZNODEEXISTS) + { + auto exception = zkutil::KeeperMultiException(code, requests, responses); + LOG_INFO(log, "Got code `{}` for path: {}. " + "It looks like the table {} was created by another server at the same moment, " + "will retry", code, exception.getPathForFirstFailedOp(), zookeeper_path.string()); + continue; + } + else if (code != Coordination::Error::ZOK) + zkutil::KeeperMultiException::check(code, requests, responses); + + return; + } + + throw Exception( + ErrorCodes::REPLICA_ALREADY_EXISTS, + "Cannot create table, because it is created concurrently every time or because " + "of wrong zookeeper path or because of logical error"); +} + +void S3QueueMetadata::cleanupThreadFunc() +{ + /// A background task is responsible for maintaining + /// settings.s3queue_tracked_files_limit and max_set_age settings for `unordered` processing mode. + + if (shutdown_called) + return; + + try + { + cleanupThreadFuncImpl(); + } + catch (...) + { + LOG_ERROR(log, "Failed to cleanup nodes in zookeeper: {}", getCurrentExceptionMessage(true)); + } + + if (shutdown_called) + return; + + task->scheduleAfter( + generateRescheduleInterval( + settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); +} + +void S3QueueMetadata::cleanupThreadFuncImpl() +{ + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueCleanupMaxSetSizeOrTTLMicroseconds); + const auto zk_client = getZooKeeper(); + const fs::path zookeeper_processed_path = zookeeper_path / "processed"; + const fs::path zookeeper_failed_path = zookeeper_path / "failed"; + const fs::path zookeeper_cleanup_lock_path = zookeeper_path / "cleanup_lock"; + + Strings processed_nodes; + auto code = zk_client->tryGetChildren(zookeeper_processed_path, processed_nodes); + if (code != Coordination::Error::ZOK) + { + if (code == Coordination::Error::ZNONODE) + { + LOG_TEST(log, "Path {} does not exist", zookeeper_processed_path.string()); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error: {}", magic_enum::enum_name(code)); + } + + Strings failed_nodes; + code = zk_client->tryGetChildren(zookeeper_failed_path, failed_nodes); + if (code != Coordination::Error::ZOK) + { + if (code == Coordination::Error::ZNONODE) + { + LOG_TEST(log, "Path {} does not exist", zookeeper_failed_path.string()); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error: {}", magic_enum::enum_name(code)); + } + + const size_t nodes_num = processed_nodes.size() + failed_nodes.size(); + if (!nodes_num) + { + LOG_TEST(log, "There are neither processed nor failed nodes (in {} and in {})", + zookeeper_processed_path.string(), zookeeper_failed_path.string()); + return; + } + + chassert(settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec); + const bool check_nodes_limit = settings.s3queue_tracked_files_limit > 0; + const bool check_nodes_ttl = settings.s3queue_tracked_file_ttl_sec > 0; + + const bool nodes_limit_exceeded = nodes_num > settings.s3queue_tracked_files_limit; + if ((!nodes_limit_exceeded || !check_nodes_limit) && !check_nodes_ttl) + { + LOG_TEST(log, "No limit exceeded"); + return; + } + + LOG_TRACE(log, "Will check limits for {} nodes", nodes_num); + + /// Create a lock so that with distributed processing + /// multiple nodes do not execute cleanup in parallel. + auto ephemeral_node = zkutil::EphemeralNodeHolder::tryCreate(zookeeper_cleanup_lock_path, *zk_client, toString(getCurrentTime())); + if (!ephemeral_node) + { + LOG_TEST(log, "Cleanup is already being executed by another node"); + return; + } + /// TODO because of this lock we might not update local file statuses on time on one of the nodes. + + struct Node + { + std::string zk_path; + S3QueueIFileMetadata::NodeMetadata metadata; + }; + auto node_cmp = [](const Node & a, const Node & b) + { + return std::tie(a.metadata.last_processed_timestamp, a.metadata.file_path) + < std::tie(b.metadata.last_processed_timestamp, b.metadata.file_path); + }; + + /// Ordered in ascending order of timestamps. + std::set sorted_nodes(node_cmp); + + auto fetch_nodes = [&](const Strings & nodes, const fs::path & base_path) + { + for (const auto & node : nodes) + { + const std::string path = base_path / node; + try + { + std::string metadata_str; + if (zk_client->tryGet(path, metadata_str)) + { + sorted_nodes.emplace(path, S3QueueIFileMetadata::NodeMetadata::fromString(metadata_str)); + LOG_TEST(log, "Fetched metadata for node {}", path); + } + else + LOG_ERROR(log, "Failed to fetch node metadata {}", path); + } + catch (const zkutil::KeeperException & e) + { + if (!Coordination::isHardwareError(e.code)) + { + LOG_WARNING(log, "Unexpected exception: {}", getCurrentExceptionMessage(true)); + chassert(false); + } + + /// Will retry with a new zk connection. + throw; + } + } + }; + + fetch_nodes(processed_nodes, zookeeper_processed_path); + fetch_nodes(failed_nodes, zookeeper_failed_path); + + auto get_nodes_str = [&]() + { + WriteBufferFromOwnString wb; + for (const auto & [node, metadata] : sorted_nodes) + wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp); + return wb.str(); + }; + LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.s3queue_tracked_files_limit, settings.s3queue_tracked_file_ttl_sec, get_nodes_str()); + + size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.s3queue_tracked_files_limit : 0; + for (const auto & node : sorted_nodes) + { + if (nodes_to_remove) + { + LOG_TRACE(log, "Removing node at path {} ({}) because max files limit is reached", + node.metadata.file_path, node.zk_path); + + local_file_statuses->remove(node.metadata.file_path, /* if_exists */true); + + code = zk_client->tryRemove(node.zk_path); + if (code == Coordination::Error::ZOK) + --nodes_to_remove; + else + LOG_ERROR(log, "Failed to remove a node `{}` (code: {})", node.zk_path, code); + } + else if (check_nodes_ttl) + { + UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp; + if (node_age >= settings.s3queue_tracked_file_ttl_sec) + { + LOG_TRACE(log, "Removing node at path {} ({}) because file ttl is reached", + node.metadata.file_path, node.zk_path); + + local_file_statuses->remove(node.metadata.file_path, /* if_exists */true); + + code = zk_client->tryRemove(node.zk_path); + if (code != Coordination::Error::ZOK) + LOG_ERROR(log, "Failed to remove a node `{}` (code: {})", node.zk_path, code); + } + else if (!nodes_to_remove) + { + /// Nodes limit satisfied. + /// Nodes ttl satisfied as well as if current node is under tll, then all remaining as well + /// (because we are iterating in timestamp ascending order). + break; + } + } + else + { + /// Nodes limit and ttl are satisfied. + break; + } + } + + LOG_TRACE(log, "Node limits check finished"); +} + +} diff --git a/src/Storages/S3Queue/S3QueueMetadata.h b/src/Storages/S3Queue/S3QueueMetadata.h new file mode 100644 index 00000000000..ef4a9808c68 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueMetadata.h @@ -0,0 +1,95 @@ +#pragma once +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; +namespace Poco { class Logger; } + +namespace DB +{ +struct S3QueueSettings; +class StorageS3Queue; +struct S3QueueTableMetadata; +struct StorageInMemoryMetadata; +using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + +/** + * A class for managing S3Queue metadata in zookeeper, e.g. + * the following folders: + * - /processed + * - /processing + * - /failed + * + * In case we use buckets for processing for Ordered mode, the structure looks like: + * - /buckets//processed -- persistent node, information about last processed file. + * - /buckets//lock -- ephemeral node, used for acquiring bucket lock. + * - /processing + * - /failed + * + * Depending on S3Queue processing mode (ordered or unordered) + * we can differently store metadata in /processed node. + * + * Implements caching of zookeeper metadata for faster responses. + * Cached part is located in LocalFileStatuses. + * + * In case of Unordered mode - if files TTL is enabled or maximum tracked files limit is set + * starts a background cleanup thread which is responsible for maintaining them. + */ +class S3QueueMetadata +{ +public: + using FileStatus = S3QueueIFileMetadata::FileStatus; + using FileMetadataPtr = std::shared_ptr; + using FileStatusPtr = std::shared_ptr; + using FileStatuses = std::unordered_map; + using Bucket = size_t; + using Processor = std::string; + + S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_); + ~S3QueueMetadata(); + + void initialize(const ConfigurationPtr & configuration, const StorageInMemoryMetadata & storage_metadata); + void checkSettings(const S3QueueSettings & settings) const; + void shutdown(); + + FileMetadataPtr getFileMetadata(const std::string & path, S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info = {}); + + FileStatusPtr getFileStatus(const std::string & path); + FileStatuses getFileStatuses() const; + + /// Method of Ordered mode parallel processing. + bool useBucketsForProcessing() const; + Bucket getBucketForPath(const std::string & path) const; + S3QueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor); + + static size_t getBucketsNum(const S3QueueSettings & settings); + static size_t getBucketsNum(const S3QueueTableMetadata & settings); + +private: + void cleanupThreadFunc(); + void cleanupThreadFuncImpl(); + + const S3QueueSettings settings; + const fs::path zookeeper_path; + const size_t buckets_num; + + bool initialized = false; + LoggerPtr log; + + std::atomic_bool shutdown_called = false; + BackgroundSchedulePool::TaskHolder task; + + class LocalFileStatuses; + std::shared_ptr local_file_statuses; +}; + +} diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp b/src/Storages/S3Queue/S3QueueMetadataFactory.cpp index 92cdab6355d..a319b21ca3e 100644 --- a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp +++ b/src/Storages/S3Queue/S3QueueMetadataFactory.cpp @@ -21,16 +21,13 @@ S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3 auto it = metadata_by_path.find(zookeeper_path); if (it == metadata_by_path.end()) { - it = metadata_by_path.emplace(zookeeper_path, std::make_shared(fs::path(zookeeper_path), settings)).first; - } - else if (it->second.metadata->checkSettings(settings)) - { - it->second.ref_count += 1; + auto files_metadata = std::make_shared(zookeeper_path, settings); + it = metadata_by_path.emplace(zookeeper_path, std::move(files_metadata)).first; } else { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata with the same `s3queue_zookeeper_path` " - "was already created but with different settings"); + it->second.metadata->checkSettings(settings); + it->second.ref_count += 1; } return it->second.metadata; } diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.h b/src/Storages/S3Queue/S3QueueMetadataFactory.h index c5e94d59050..80e96f8aa7e 100644 --- a/src/Storages/S3Queue/S3QueueMetadataFactory.h +++ b/src/Storages/S3Queue/S3QueueMetadataFactory.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include namespace DB { @@ -9,7 +9,7 @@ namespace DB class S3QueueMetadataFactory final : private boost::noncopyable { public: - using FilesMetadataPtr = std::shared_ptr; + using FilesMetadataPtr = std::shared_ptr; static S3QueueMetadataFactory & instance(); @@ -22,9 +22,9 @@ public: private: struct Metadata { - explicit Metadata(std::shared_ptr metadata_) : metadata(metadata_), ref_count(1) {} + explicit Metadata(std::shared_ptr metadata_) : metadata(metadata_), ref_count(1) {} - std::shared_ptr metadata; + std::shared_ptr metadata; /// TODO: the ref count should be kept in keeper, because of the case with distributed processing. size_t ref_count = 0; }; diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp b/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp new file mode 100644 index 00000000000..d1298b8c4fa --- /dev/null +++ b/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp @@ -0,0 +1,414 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + S3QueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num) + { + return sipHash64(path) % buckets_num; + } + + std::string getProcessedPathForBucket(const std::filesystem::path & zk_path, size_t bucket) + { + return zk_path / "buckets" / toString(bucket) / "processed"; + } + + std::string getProcessedPath(const std::filesystem::path & zk_path, const std::string & path, size_t buckets_num) + { + if (buckets_num > 1) + return getProcessedPathForBucket(zk_path, getBucketForPathImpl(path, buckets_num)); + else + return zk_path / "processed"; + } + + zkutil::ZooKeeperPtr getZooKeeper() + { + return Context::getGlobalContextInstance()->getZooKeeper(); + } +} + +S3QueueOrderedFileMetadata::BucketHolder::BucketHolder( + const Bucket & bucket_, + int bucket_version_, + const std::string & bucket_lock_path_, + const std::string & bucket_lock_id_path_, + zkutil::ZooKeeperPtr zk_client_) + : bucket_info(std::make_shared(BucketInfo{ + .bucket = bucket_, + .bucket_version = bucket_version_, + .bucket_lock_path = bucket_lock_path_, + .bucket_lock_id_path = bucket_lock_id_path_})) + , zk_client(zk_client_) +{ +} + +void S3QueueOrderedFileMetadata::BucketHolder::release() +{ + if (released) + return; + + released = true; + LOG_TEST(getLogger("S3QueueBucketHolder"), "Releasing bucket {}", bucket_info->bucket); + + Coordination::Requests requests; + /// Check that bucket lock version has not changed + /// (which could happen if session had expired as bucket_lock_path is ephemeral node). + requests.push_back(zkutil::makeCheckRequest(bucket_info->bucket_lock_id_path, bucket_info->bucket_version)); + /// Remove bucket lock. + requests.push_back(zkutil::makeRemoveRequest(bucket_info->bucket_lock_path, -1)); + + Coordination::Responses responses; + const auto code = zk_client->tryMulti(requests, responses); + zkutil::KeeperMultiException::check(code, requests, responses); +} + +S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder() +{ + try + { + release(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( + const std::filesystem::path & zk_path_, + const std::string & path_, + FileStatusPtr file_status_, + BucketInfoPtr bucket_info_, + size_t buckets_num_, + size_t max_loading_retries_, + LoggerPtr log_) + : S3QueueIFileMetadata( + path_, + /* processing_node_path */zk_path_ / "processing" / getNodeName(path_), + /* processed_node_path */getProcessedPath(zk_path_, path_, buckets_num_), + /* failed_node_path */zk_path_ / "failed" / getNodeName(path_), + file_status_, + max_loading_retries_, + log_) + , buckets_num(buckets_num_) + , zk_path(zk_path_) + , bucket_info(bucket_info_) +{ +} + +std::vector S3QueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num) +{ + if (buckets_num > 1) + { + std::vector paths{"buckets", "failed", "processing"}; + for (size_t i = 0; i < buckets_num; ++i) + paths.push_back("buckets/" + toString(i)); + return paths; + } + else + return {"failed", "processing"}; +} + +bool S3QueueOrderedFileMetadata::getMaxProcessedFile( + NodeMetadata & result, + Coordination::Stat * stat, + const zkutil::ZooKeeperPtr & zk_client) +{ + return getMaxProcessedFile(result, stat, processed_node_path, zk_client); +} + +bool S3QueueOrderedFileMetadata::getMaxProcessedFile( + NodeMetadata & result, + Coordination::Stat * stat, + const std::string & processed_node_path_, + const zkutil::ZooKeeperPtr & zk_client) +{ + std::string data; + if (zk_client->tryGet(processed_node_path_, data, stat)) + { + if (!data.empty()) + result = NodeMetadata::fromString(data); + return true; + } + return false; +} + +S3QueueOrderedFileMetadata::Bucket S3QueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num) +{ + return getBucketForPathImpl(path_, buckets_num); +} + +S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcquireBucket( + const std::filesystem::path & zk_path, + const Bucket & bucket, + const Processor & processor) +{ + const auto zk_client = getZooKeeper(); + const auto bucket_lock_path = zk_path / "buckets" / toString(bucket) / "lock"; + const auto bucket_lock_id_path = zk_path / "buckets" / toString(bucket) / "lock_id"; + const auto processor_info = getProcessorInfo(processor); + + Coordination::Requests requests; + + /// Create bucket lock node as ephemeral node. + requests.push_back(zkutil::makeCreateRequest(bucket_lock_path, "", zkutil::CreateMode::Ephemeral)); + + /// Create bucket lock id node as persistent node if it does not exist yet. + requests.push_back( + zkutil::makeCreateRequest( + bucket_lock_id_path, processor_info, zkutil::CreateMode::Persistent, /* ignore_if_exists */true)); + + /// Update bucket lock id path. We use its version as a version of ephemeral bucket lock node. + /// (See comment near S3QueueIFileMetadata::processing_node_version). + requests.push_back(zkutil::makeSetRequest(bucket_lock_id_path, processor_info, -1)); + + Coordination::Responses responses; + const auto code = zk_client->tryMulti(requests, responses); + if (code == Coordination::Error::ZOK) + { + const auto * set_response = dynamic_cast(responses[2].get()); + const auto bucket_lock_version = set_response->stat.version; + + LOG_TEST( + getLogger("S3QueueOrderedFileMetadata"), + "Processor {} acquired bucket {} for processing (bucket lock version: {})", + processor, bucket, bucket_lock_version); + + return std::make_shared( + bucket, + bucket_lock_version, + bucket_lock_path, + bucket_lock_id_path, + zk_client); + } + + if (code == Coordination::Error::ZNODEEXISTS) + return nullptr; + + if (Coordination::isHardwareError(code)) + return nullptr; + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error: {}", code); +} + +std::pair S3QueueOrderedFileMetadata::setProcessingImpl() +{ + /// In one zookeeper transaction do the following: + enum RequestType + { + /// node_name is not within failed persistent nodes + FAILED_PATH_DOESNT_EXIST = 0, + /// node_name ephemeral processing node was successfully created + CREATED_PROCESSING_PATH = 2, + /// update processing id + SET_PROCESSING_ID = 4, + /// bucket version did not change + CHECKED_BUCKET_VERSION = 5, + /// max_processed_node version did not change + CHECKED_MAX_PROCESSED_PATH = 6, + }; + + const auto zk_client = getZooKeeper(); + processing_id = node_metadata.processing_id = getRandomASCIIString(10); + auto processor_info = getProcessorInfo(processing_id.value()); + + while (true) + { + NodeMetadata processed_node; + Coordination::Stat processed_node_stat; + bool has_processed_node = getMaxProcessedFile(processed_node, &processed_node_stat, zk_client); + if (has_processed_node) + { + LOG_TEST(log, "Current max processed file {} from path: {}", + processed_node.file_path, processed_node_path); + + if (!processed_node.file_path.empty() && path <= processed_node.file_path) + { + return {false, FileStatus::State::Processed}; + } + } + + Coordination::Requests requests; + requests.push_back(zkutil::makeCreateRequest(failed_node_path, "", zkutil::CreateMode::Persistent)); + requests.push_back(zkutil::makeRemoveRequest(failed_node_path, -1)); + requests.push_back(zkutil::makeCreateRequest(processing_node_path, node_metadata.toString(), zkutil::CreateMode::Ephemeral)); + + requests.push_back( + zkutil::makeCreateRequest( + processing_node_id_path, processor_info, zkutil::CreateMode::Persistent, /* ignore_if_exists */true)); + requests.push_back(zkutil::makeSetRequest(processing_node_id_path, processor_info, -1)); + + if (bucket_info) + requests.push_back(zkutil::makeCheckRequest(bucket_info->bucket_lock_id_path, bucket_info->bucket_version)); + + /// TODO: for ordered processing with buckets it should be enough to check only bucket lock version, + /// so may be remove creation and check for processing_node_id if bucket_info is set? + + if (has_processed_node) + { + requests.push_back(zkutil::makeCheckRequest(processed_node_path, processed_node_stat.version)); + } + else + { + requests.push_back(zkutil::makeCreateRequest(processed_node_path, "", zkutil::CreateMode::Persistent)); + requests.push_back(zkutil::makeRemoveRequest(processed_node_path, -1)); + } + + Coordination::Responses responses; + const auto code = zk_client->tryMulti(requests, responses); + auto is_request_failed = [&](RequestType type) { return responses[type]->error != Coordination::Error::ZOK; }; + + if (code == Coordination::Error::ZOK) + { + const auto * set_response = dynamic_cast(responses[SET_PROCESSING_ID].get()); + processing_id_version = set_response->stat.version; + return {true, FileStatus::State::None}; + } + + if (is_request_failed(FAILED_PATH_DOESNT_EXIST)) + return {false, FileStatus::State::Failed}; + + if (is_request_failed(CREATED_PROCESSING_PATH)) + return {false, FileStatus::State::Processing}; + + if (bucket_info && is_request_failed(CHECKED_BUCKET_VERSION)) + { + LOG_TEST(log, "Version of bucket lock changed: {}. Will retry for file `{}`", code, path); + continue; + } + + if (is_request_failed(bucket_info ? CHECKED_MAX_PROCESSED_PATH : CHECKED_BUCKET_VERSION)) + { + LOG_TEST(log, "Version of max processed file changed: {}. Will retry for file `{}`", code, path); + continue; + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected response state: {}", code); + } +} + +void S3QueueOrderedFileMetadata::setProcessedAtStartRequests( + Coordination::Requests & requests, + const zkutil::ZooKeeperPtr & zk_client) +{ + if (buckets_num > 1) + { + for (size_t i = 0; i < buckets_num; ++i) + { + auto path = getProcessedPathForBucket(zk_path, i); + setProcessedRequests(requests, zk_client, path, /* ignore_if_exists */true); + } + } + else + { + setProcessedRequests(requests, zk_client, processed_node_path, /* ignore_if_exists */true); + } +} + +void S3QueueOrderedFileMetadata::setProcessedRequests( + Coordination::Requests & requests, + const zkutil::ZooKeeperPtr & zk_client, + const std::string & processed_node_path_, + bool ignore_if_exists) +{ + NodeMetadata processed_node; + Coordination::Stat processed_node_stat; + if (getMaxProcessedFile(processed_node, &processed_node_stat, processed_node_path_, zk_client)) + { + LOG_TEST(log, "Current max processed file: {}, condition less: {}", + processed_node.file_path, bool(path <= processed_node.file_path)); + + if (!processed_node.file_path.empty() && path <= processed_node.file_path) + { + LOG_TRACE(log, "File {} is already processed, current max processed file: {}", path, processed_node.file_path); + + if (ignore_if_exists) + return; + + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "File ({}) is already processed, while expected it not to be (path: {})", + path, processed_node_path_); + } + requests.push_back(zkutil::makeSetRequest(processed_node_path_, node_metadata.toString(), processed_node_stat.version)); + } + else + { + LOG_TEST(log, "Max processed file does not exist, creating at: {}", processed_node_path_); + requests.push_back(zkutil::makeCreateRequest(processed_node_path_, node_metadata.toString(), zkutil::CreateMode::Persistent)); + } + + if (processing_id_version.has_value()) + { + requests.push_back(zkutil::makeCheckRequest(processing_node_id_path, processing_id_version.value())); + requests.push_back(zkutil::makeRemoveRequest(processing_node_id_path, processing_id_version.value())); + requests.push_back(zkutil::makeRemoveRequest(processing_node_path, -1)); + } +} + +void S3QueueOrderedFileMetadata::setProcessedImpl() +{ + /// In one zookeeper transaction do the following: + enum RequestType + { + SET_MAX_PROCESSED_PATH = 0, + CHECK_PROCESSING_ID_PATH = 1, /// Optional. + REMOVE_PROCESSING_ID_PATH = 2, /// Optional. + REMOVE_PROCESSING_PATH = 3, /// Optional. + }; + + const auto zk_client = getZooKeeper(); + const auto node_metadata_str = node_metadata.toString(); + std::string failure_reason; + + while (true) + { + Coordination::Requests requests; + setProcessedRequests(requests, zk_client, processed_node_path, /* ignore_if_exists */false); + + Coordination::Responses responses; + auto is_request_failed = [&](RequestType type) { return responses[type]->error != Coordination::Error::ZOK; }; + + auto code = zk_client->tryMulti(requests, responses); + if (code == Coordination::Error::ZOK) + { + if (max_loading_retries) + zk_client->tryRemove(failed_node_path + ".retriable", -1); + return; + } + + if (Coordination::isHardwareError(code)) + failure_reason = "Lost connection to keeper"; + else if (is_request_failed(SET_MAX_PROCESSED_PATH)) + { + LOG_TRACE(log, "Cannot set file {} as processed. " + "Failed to update processed node: {}. " + "Will retry.", path, code); + continue; + } + else if (is_request_failed(CHECK_PROCESSING_ID_PATH)) + failure_reason = "Version of processing id node changed"; + else if (is_request_failed(REMOVE_PROCESSING_PATH)) + failure_reason = "Failed to remove processing path"; + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state of zookeeper transaction: {}", code); + + LOG_WARNING(log, "Cannot set file {} as processed: {}. Reason: {}", path, code, failure_reason); + return; + } +} + +} diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h b/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h new file mode 100644 index 00000000000..698ec0f54cc --- /dev/null +++ b/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h @@ -0,0 +1,97 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ + +class S3QueueOrderedFileMetadata : public S3QueueIFileMetadata +{ +public: + using Processor = std::string; + using Bucket = size_t; + struct BucketInfo + { + Bucket bucket; + int bucket_version; + std::string bucket_lock_path; + std::string bucket_lock_id_path; + }; + using BucketInfoPtr = std::shared_ptr; + + explicit S3QueueOrderedFileMetadata( + const std::filesystem::path & zk_path_, + const std::string & path_, + FileStatusPtr file_status_, + BucketInfoPtr bucket_info_, + size_t buckets_num_, + size_t max_loading_retries_, + LoggerPtr log_); + + struct BucketHolder; + using BucketHolderPtr = std::shared_ptr; + + static BucketHolderPtr tryAcquireBucket( + const std::filesystem::path & zk_path, + const Bucket & bucket, + const Processor & processor); + + static S3QueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num); + + static std::vector getMetadataPaths(size_t buckets_num); + + void setProcessedAtStartRequests( + Coordination::Requests & requests, + const zkutil::ZooKeeperPtr & zk_client) override; + +private: + const size_t buckets_num; + const std::string zk_path; + const BucketInfoPtr bucket_info; + + std::pair setProcessingImpl() override; + void setProcessedImpl() override; + + bool getMaxProcessedFile( + NodeMetadata & result, + Coordination::Stat * stat, + const zkutil::ZooKeeperPtr & zk_client); + + bool getMaxProcessedFile( + NodeMetadata & result, + Coordination::Stat * stat, + const std::string & processed_node_path_, + const zkutil::ZooKeeperPtr & zk_client); + + void setProcessedRequests( + Coordination::Requests & requests, + const zkutil::ZooKeeperPtr & zk_client, + const std::string & processed_node_path_, + bool ignore_if_exists); +}; + +struct S3QueueOrderedFileMetadata::BucketHolder +{ + BucketHolder( + const Bucket & bucket_, + int bucket_version_, + const std::string & bucket_lock_path_, + const std::string & bucket_lock_id_path_, + zkutil::ZooKeeperPtr zk_client_); + + ~BucketHolder(); + + Bucket getBucket() const { return bucket_info->bucket; } + BucketInfoPtr getBucketInfo() const { return bucket_info; } + + void release(); + +private: + BucketInfoPtr bucket_info; + const zkutil::ZooKeeperPtr zk_client; + bool released = false; +}; + +} diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h index c26e973a1c0..c486a7fbb5d 100644 --- a/src/Storages/S3Queue/S3QueueSettings.h +++ b/src/Storages/S3Queue/S3QueueSettings.h @@ -13,7 +13,7 @@ class ASTStorage; #define S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ M(S3QueueMode, \ mode, \ - S3QueueMode::ORDERED, \ + S3QueueMode::UNORDERED, \ "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ "With ordered mode, only the max name of the successfully consumed file stored.", \ 0) \ @@ -30,8 +30,7 @@ class ASTStorage; M(UInt32, s3queue_tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ M(UInt32, s3queue_cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ M(UInt32, s3queue_cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ - M(UInt32, s3queue_total_shards_num, 1, "Value 0 means disabled", 0) \ - M(UInt32, s3queue_current_shard_num, 0, "", 0) \ + M(UInt32, s3queue_buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ #define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index 19c69d5c589..d8633037ed9 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -1,6 +1,5 @@ #include "config.h" -#if USE_AWS_S3 #include #include #include @@ -8,6 +7,7 @@ #include #include #include +#include namespace CurrentMetrics @@ -31,127 +31,25 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -StorageS3QueueSource::S3QueueKeyWithInfo::S3QueueKeyWithInfo( - const std::string & key_, - std::optional info_, - Metadata::ProcessingNodeHolderPtr processing_holder_) - : StorageS3Source::KeyWithInfo(key_, info_) +StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( + const ObjectInfo & object_info, + Metadata::FileMetadataPtr processing_holder_) + : ObjectInfo(object_info.relative_path, object_info.metadata) , processing_holder(processing_holder_) { } StorageS3QueueSource::FileIterator::FileIterator( - std::shared_ptr metadata_, + std::shared_ptr metadata_, std::unique_ptr glob_iterator_, - size_t current_shard_, - std::atomic & shutdown_called_) - : metadata(metadata_) + std::atomic & shutdown_called_, + LoggerPtr logger_) + : StorageObjectStorageSource::IIterator("S3QueueIterator") + , metadata(metadata_) , glob_iterator(std::move(glob_iterator_)) , shutdown_called(shutdown_called_) - , log(&Poco::Logger::get("StorageS3QueueSource")) - , sharded_processing(metadata->isShardedProcessing()) - , current_shard(current_shard_) + , log(logger_) { - if (sharded_processing) - { - for (const auto & id : metadata->getProcessingIdsForShard(current_shard)) - sharded_keys.emplace(id, std::deque{}); - } -} - -StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(size_t idx) -{ - while (!shutdown_called) - { - KeyWithInfoPtr val{nullptr}; - - { - std::unique_lock lk(sharded_keys_mutex, std::defer_lock); - if (sharded_processing) - { - /// To make sure order on keys in each shard in sharded_keys - /// we need to check sharded_keys and to next() under lock. - lk.lock(); - - if (auto it = sharded_keys.find(idx); it != sharded_keys.end()) - { - auto & keys = it->second; - if (!keys.empty()) - { - val = keys.front(); - keys.pop_front(); - chassert(idx == metadata->getProcessingIdForPath(val->key)); - } - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Processing id {} does not exist (Expected ids: {})", - idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); - } - } - - if (!val) - { - val = glob_iterator->next(); - if (val && sharded_processing) - { - const auto processing_id_for_key = metadata->getProcessingIdForPath(val->key); - if (idx != processing_id_for_key) - { - if (metadata->isProcessingIdBelongsToShard(processing_id_for_key, current_shard)) - { - LOG_TEST(log, "Putting key {} into queue of processor {} (total: {})", - val->key, processing_id_for_key, sharded_keys.size()); - - if (auto it = sharded_keys.find(processing_id_for_key); it != sharded_keys.end()) - { - it->second.push_back(val); - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Processing id {} does not exist (Expected ids: {})", - processing_id_for_key, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); - } - } - continue; - } - } - } - } - - if (!val) - return {}; - - if (shutdown_called) - { - LOG_TEST(log, "Shutdown was called, stopping file iterator"); - return {}; - } - - auto processing_holder = metadata->trySetFileAsProcessing(val->key); - if (shutdown_called) - { - LOG_TEST(log, "Shutdown was called, stopping file iterator"); - return {}; - } - - LOG_TEST(log, "Checking if can process key {} for processing_id {}", val->key, idx); - - if (processing_holder) - { - return std::make_shared(val->key, val->info, processing_holder); - } - else if (sharded_processing - && metadata->getFileStatus(val->key)->state == S3QueueFilesMetadata::FileStatus::State::Processing) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "File {} is processing by someone else in sharded processing. " - "It is a bug", val->key); - } - } - return {}; } size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() @@ -159,12 +57,242 @@ size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method estimateKeysCount is not implemented"); } +StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor) +{ + ObjectInfoPtr object_info; + S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info; + + while (!shutdown_called) + { + if (metadata->useBucketsForProcessing()) + std::tie(object_info, bucket_info) = getNextKeyFromAcquiredBucket(processor); + else + object_info = glob_iterator->next(processor); + + if (!object_info) + return {}; + + if (shutdown_called) + { + LOG_TEST(log, "Shutdown was called, stopping file iterator"); + return {}; + } + + auto file_metadata = metadata->getFileMetadata(object_info->relative_path, bucket_info); + if (file_metadata->setProcessing()) + return std::make_shared(*object_info, file_metadata); + } + return {}; +} + +std::pair +StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) +{ + /// We need this lock to maintain consistency between listing s3 directory + /// and getting/putting result into listed_keys_cache. + std::lock_guard lock(buckets_mutex); + + auto bucket_holder_it = bucket_holders.emplace(processor, nullptr).first; + auto current_processor = toString(processor); + + LOG_TEST( + log, "Current processor: {}, acquired bucket: {}", + processor, bucket_holder_it->second ? toString(bucket_holder_it->second->getBucket()) : "None"); + + while (true) + { + /// Each processing thread gets next path from glob_iterator->next() + /// and checks if corresponding bucket is already acquired by someone. + /// In case it is already acquired, they put the key into listed_keys_cache, + /// so that the thread who acquired the bucket will be able to see + /// those keys without the need to list s3 directory once again. + if (bucket_holder_it->second) + { + const auto bucket = bucket_holder_it->second->getBucket(); + auto it = listed_keys_cache.find(bucket); + if (it != listed_keys_cache.end()) + { + /// `bucket_keys` -- keys we iterated so far and which were not taken for processing. + /// `bucket_processor` -- processor id of the thread which has acquired the bucket. + auto & [bucket_keys, bucket_processor] = it->second; + + /// Check correctness just in case. + if (!bucket_processor.has_value()) + { + bucket_processor = current_processor; + } + else if (bucket_processor.value() != current_processor) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected current processor {} to be equal to {} for bucket {}", + current_processor, + bucket_processor.has_value() ? toString(bucket_processor.value()) : "None", + bucket); + } + + /// Take next key to process + if (!bucket_keys.empty()) + { + /// Take the key from the front, the order is important. + auto object_info = bucket_keys.front(); + bucket_keys.pop_front(); + + LOG_TEST(log, "Current bucket: {}, will process file: {}", + bucket, object_info->getFileName()); + + return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + } + + LOG_TEST(log, "Cache of bucket {} is empty", bucket); + + /// No more keys in bucket, remove it from cache. + listed_keys_cache.erase(it); + } + else + { + LOG_TEST(log, "Cache of bucket {} is empty", bucket); + } + + if (iterator_finished) + { + /// Bucket is fully processed - release the bucket. + bucket_holder_it->second->release(); + bucket_holder_it->second.reset(); + } + } + /// If processing thread has already acquired some bucket + /// and while listing s3 directory gets a key which is in a different bucket, + /// it puts the key into listed_keys_cache to allow others to process it, + /// because one processing thread can acquire only one bucket at a time. + /// Once a thread is finished with its acquired bucket, it checks listed_keys_cache + /// to see if there are keys from buckets not acquired by anyone. + if (!bucket_holder_it->second) + { + for (auto it = listed_keys_cache.begin(); it != listed_keys_cache.end();) + { + auto & [bucket, bucket_info] = *it; + auto & [bucket_keys, bucket_processor] = bucket_info; + + LOG_TEST(log, "Bucket: {}, cached keys: {}, processor: {}", + bucket, bucket_keys.size(), bucket_processor.has_value() ? toString(bucket_processor.value()) : "None"); + + if (bucket_processor.has_value()) + { + LOG_TEST(log, "Bucket {} is already locked for processing by {} (keys: {})", + bucket, bucket_processor.value(), bucket_keys.size()); + ++it; + continue; + } + + if (bucket_keys.empty()) + { + /// No more keys in bucket, remove it from cache. + /// We still might add new keys to this bucket if !iterator_finished. + it = listed_keys_cache.erase(it); + continue; + } + + bucket_holder_it->second = metadata->tryAcquireBucket(bucket, current_processor); + if (!bucket_holder_it->second) + { + LOG_TEST(log, "Bucket {} is already locked for processing (keys: {})", + bucket, bucket_keys.size()); + ++it; + continue; + } + + bucket_processor = current_processor; + + /// Take the key from the front, the order is important. + auto object_info = bucket_keys.front(); + bucket_keys.pop_front(); + + LOG_TEST(log, "Acquired bucket: {}, will process file: {}", + bucket, object_info->getFileName()); + + return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + } + } + + if (iterator_finished) + { + LOG_TEST(log, "Reached the end of file iterator and nothing left in keys cache"); + return {}; + } + + auto object_info = glob_iterator->next(processor); + if (object_info) + { + const auto bucket = metadata->getBucketForPath(object_info->relative_path); + auto & bucket_cache = listed_keys_cache[bucket]; + + LOG_TEST(log, "Found next file: {}, bucket: {}, current bucket: {}, cached_keys: {}", + object_info->getFileName(), bucket, + bucket_holder_it->second ? toString(bucket_holder_it->second->getBucket()) : "None", + bucket_cache.keys.size()); + + if (bucket_holder_it->second) + { + if (bucket_holder_it->second->getBucket() != bucket) + { + /// Acquired bucket differs from object's bucket, + /// put it into bucket's cache and continue. + bucket_cache.keys.emplace_back(object_info); + continue; + } + /// Bucket is already acquired, process the file. + return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + } + else + { + bucket_holder_it->second = metadata->tryAcquireBucket(bucket, current_processor); + if (bucket_holder_it->second) + { + bucket_cache.processor = current_processor; + if (!bucket_cache.keys.empty()) + { + /// We have to maintain ordering between keys, + /// so if some keys are already in cache - start with them. + bucket_cache.keys.emplace_back(object_info); + object_info = bucket_cache.keys.front(); + bucket_cache.keys.pop_front(); + } + return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + } + else + { + LOG_TEST(log, "Bucket {} is already locked for processing", bucket); + bucket_cache.keys.emplace_back(object_info); + continue; + } + } + } + else + { + if (bucket_holder_it->second) + { + bucket_holder_it->second->release(); + bucket_holder_it->second.reset(); + } + + LOG_TEST(log, "Reached the end of file iterator"); + iterator_finished = true; + + if (listed_keys_cache.empty()) + return {}; + else + continue; + } + } +} + StorageS3QueueSource::StorageS3QueueSource( String name_, + size_t processor_id_, const Block & header_, - std::unique_ptr internal_source_, - std::shared_ptr files_metadata_, - size_t processing_id_, + std::unique_ptr internal_source_, + std::shared_ptr files_metadata_, const S3QueueAction & action_, RemoveFileFunc remove_file_func_, const NamesAndTypesList & requested_virtual_columns_, @@ -177,8 +305,8 @@ StorageS3QueueSource::StorageS3QueueSource( : ISource(header_) , WithContext(context_) , name(std::move(name_)) + , processor_id(processor_id_) , action(action_) - , processing_id(processing_id_) , files_metadata(files_metadata_) , internal_source(std::move(internal_source_)) , requested_virtual_columns(requested_virtual_columns_) @@ -191,22 +319,17 @@ StorageS3QueueSource::StorageS3QueueSource( { } -StorageS3QueueSource::~StorageS3QueueSource() -{ - internal_source->create_reader_pool.wait(); -} - String StorageS3QueueSource::getName() const { return name; } -void StorageS3QueueSource::lazyInitialize() +void StorageS3QueueSource::lazyInitialize(size_t processor) { if (initialized) return; - internal_source->lazyInitialize(processing_id); + internal_source->lazyInitialize(processor); reader = std::move(internal_source->reader); if (reader) reader_future = std::move(internal_source->reader_future); @@ -215,15 +338,16 @@ void StorageS3QueueSource::lazyInitialize() Chunk StorageS3QueueSource::generate() { - lazyInitialize(); + lazyInitialize(processor_id); while (true) { if (!reader) break; - const auto * key_with_info = dynamic_cast(&reader.getKeyWithInfo()); - auto file_status = key_with_info->processing_holder->getFileStatus(); + const auto * object_info = dynamic_cast(&reader.getObjectInfo()); + auto file_metadata = object_info->processing_holder; + auto file_status = file_metadata->getFileStatus(); if (isCancelled()) { @@ -233,19 +357,22 @@ Chunk StorageS3QueueSource::generate() { try { - files_metadata->setFileFailed(key_with_info->processing_holder, "Cancelled"); + file_metadata->setFailed("Cancelled"); } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + LOG_ERROR(log, "Failed to set file {} as failed: {}", + object_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(reader.getObjectInfo().getPath(), *file_status, processed_rows_from_file, false); } break; } + const auto & path = reader.getObjectInfo().getPath(); + if (shutdown_called) { if (processed_rows_from_file == 0) @@ -255,18 +382,19 @@ Chunk StorageS3QueueSource::generate() { LOG_DEBUG( log, "Table is being dropped, {} rows are already processed from {}, but file is not fully processed", - processed_rows_from_file, reader.getFile()); + processed_rows_from_file, path); try { - files_metadata->setFileFailed(key_with_info->processing_holder, "Table is dropped"); + file_metadata->setFailed("Table is dropped"); } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + LOG_ERROR(log, "Failed to set file {} as failed: {}", + object_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(path, *file_status, processed_rows_from_file, false); /// Leave the file half processed. Table is being dropped, so we do not care. break; @@ -274,7 +402,7 @@ Chunk StorageS3QueueSource::generate() LOG_DEBUG(log, "Shutdown called, but file {} is partially processed ({} rows). " "Will process the file fully and then shutdown", - reader.getFile(), processed_rows_from_file); + path, processed_rows_from_file); } auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); @@ -288,30 +416,31 @@ Chunk StorageS3QueueSource::generate() Chunk chunk; if (reader->pull(chunk)) { - LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getPath()); + LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), path); file_status->processed_rows += chunk.getNumRows(); processed_rows_from_file += chunk.getNumRows(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getKeyWithInfo().info->size); + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + chunk, requested_virtual_columns, path, reader.getObjectInfo().metadata->size_bytes); return chunk; } } catch (...) { const auto message = getCurrentExceptionMessage(true); - LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getFile(), message); + LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", path, message); - files_metadata->setFileFailed(key_with_info->processing_holder, message); + file_metadata->setFailed(message); - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(path, *file_status, processed_rows_from_file, false); throw; } - files_metadata->setFileProcessed(key_with_info->processing_holder); - applyActionAfterProcessing(reader.getFile()); + file_metadata->setProcessed(); + applyActionAfterProcessing(reader.getObjectInfo().relative_path); - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, true); + appendLogElement(path, *file_status, processed_rows_from_file, true); file_status.reset(); processed_rows_from_file = 0; @@ -327,12 +456,12 @@ Chunk StorageS3QueueSource::generate() if (!reader) break; - file_status = files_metadata->getFileStatus(reader.getFile()); + file_status = files_metadata->getFileStatus(reader.getObjectInfo().getPath()); /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool.wait(); - reader_future = internal_source->createReaderAsync(processing_id); + internal_source->create_reader_pool->wait(); + reader_future = internal_source->createReaderAsync(processor_id); } return {}; @@ -355,7 +484,7 @@ void StorageS3QueueSource::applyActionAfterProcessing(const String & path) void StorageS3QueueSource::appendLogElement( const std::string & filename, - S3QueueFilesMetadata::FileStatus & file_status_, + S3QueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed) { @@ -364,7 +493,6 @@ void StorageS3QueueSource::appendLogElement( S3QueueLogElement elem{}; { - std::lock_guard lock(file_status_.metadata_lock); elem = S3QueueLogElement { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), @@ -377,12 +505,10 @@ void StorageS3QueueSource::appendLogElement( .counters_snapshot = file_status_.profile_counters.getPartiallyAtomicSnapshot(), .processing_start_time = file_status_.processing_start_time, .processing_end_time = file_status_.processing_end_time, - .exception = file_status_.last_exception, + .exception = file_status_.getException(), }; } s3_queue_log->add(std::move(elem)); } } - -#endif diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index 8fc7305ea08..6e098f8cb63 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -1,11 +1,11 @@ #pragma once #include "config.h" -#if USE_AWS_S3 #include #include -#include -#include +#include +#include +#include #include @@ -14,62 +14,78 @@ namespace Poco { class Logger; } namespace DB { +struct ObjectMetadata; + class StorageS3QueueSource : public ISource, WithContext { public: - using IIterator = StorageS3Source::IIterator; - using KeyWithInfoPtr = StorageS3Source::KeyWithInfoPtr; - using GlobIterator = StorageS3Source::DisclosedGlobIterator; + using Storage = StorageObjectStorage; + using ConfigurationPtr = Storage::ConfigurationPtr; + using GlobIterator = StorageObjectStorageSource::GlobIterator; using ZooKeeperGetter = std::function; using RemoveFileFunc = std::function; - using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr; - using Metadata = S3QueueFilesMetadata; + using FileStatusPtr = S3QueueMetadata::FileStatusPtr; + using ReaderHolder = StorageObjectStorageSource::ReaderHolder; + using Metadata = S3QueueMetadata; + using ObjectInfo = StorageObjectStorageSource::ObjectInfo; + using ObjectInfoPtr = std::shared_ptr; + using ObjectInfos = std::vector; - struct S3QueueKeyWithInfo : public StorageS3Source::KeyWithInfo + struct S3QueueObjectInfo : public ObjectInfo { - S3QueueKeyWithInfo( - const std::string & key_, - std::optional info_, - Metadata::ProcessingNodeHolderPtr processing_holder_); + S3QueueObjectInfo( + const ObjectInfo & object_info, + Metadata::FileMetadataPtr processing_holder_); - Metadata::ProcessingNodeHolderPtr processing_holder; + Metadata::FileMetadataPtr processing_holder; }; - class FileIterator : public IIterator + class FileIterator : public StorageObjectStorageSource::IIterator { public: FileIterator( - std::shared_ptr metadata_, + std::shared_ptr metadata_, std::unique_ptr glob_iterator_, - size_t current_shard_, - std::atomic & shutdown_called_); + std::atomic & shutdown_called_, + LoggerPtr logger_); /// Note: /// List results in s3 are always returned in UTF-8 binary order. /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - KeyWithInfoPtr next(size_t idx) override; + ObjectInfoPtr nextImpl(size_t processor) override; size_t estimatedKeysCount() override; private: - const std::shared_ptr metadata; + using Bucket = S3QueueMetadata::Bucket; + using Processor = S3QueueMetadata::Processor; + + const std::shared_ptr metadata; const std::unique_ptr glob_iterator; + std::atomic & shutdown_called; std::mutex mutex; - Poco::Logger * log; + LoggerPtr log; - const bool sharded_processing; - const size_t current_shard; - std::unordered_map> sharded_keys; - std::mutex sharded_keys_mutex; + std::mutex buckets_mutex; + struct ListedKeys + { + std::deque keys; + std::optional processor; + }; + std::unordered_map listed_keys_cache; + bool iterator_finished = false; + std::unordered_map bucket_holders; + + std::pair getNextKeyFromAcquiredBucket(size_t processor); }; StorageS3QueueSource( String name_, + size_t processor_id_, const Block & header_, - std::unique_ptr internal_source_, - std::shared_ptr files_metadata_, - size_t processing_id_, + std::unique_ptr internal_source_, + std::shared_ptr files_metadata_, const S3QueueAction & action_, RemoveFileFunc remove_file_func_, const NamesAndTypesList & requested_virtual_columns_, @@ -80,8 +96,6 @@ public: const StorageID & storage_id_, LoggerPtr log_); - ~StorageS3QueueSource() override; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); String getName() const override; @@ -90,10 +104,10 @@ public: private: const String name; + const size_t processor_id; const S3QueueAction action; - const size_t processing_id; - const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; + const std::shared_ptr files_metadata; + const std::shared_ptr internal_source; const NamesAndTypesList requested_virtual_columns; const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; @@ -103,16 +117,16 @@ private: RemoveFileFunc remove_file_func; LoggerPtr log; - using ReaderHolder = StorageS3Source::ReaderHolder; ReaderHolder reader; std::future reader_future; std::atomic initialized{false}; size_t processed_rows_from_file = 0; - void lazyInitialize(); + S3QueueOrderedFileMetadata::BucketHolderPtr current_bucket_holder; + void applyActionAfterProcessing(const String & path); - void appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); + void appendLogElement(const std::string & filename, S3QueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); + void lazyInitialize(size_t processor); }; } -#endif diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp index 1830bac4743..ecaa7ad57cc 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -1,13 +1,12 @@ #include -#if USE_AWS_S3 - #include #include #include #include #include -#include +#include +#include namespace DB @@ -33,17 +32,17 @@ namespace S3QueueTableMetadata::S3QueueTableMetadata( - const StorageS3::Configuration & configuration, + const StorageObjectStorage::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata) { format_name = configuration.format; after_processing = engine_settings.after_processing.toString(); mode = engine_settings.mode.toString(); - s3queue_tracked_files_limit = engine_settings.s3queue_tracked_files_limit; - s3queue_tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; - s3queue_total_shards_num = engine_settings.s3queue_total_shards_num; - s3queue_processing_threads_num = engine_settings.s3queue_processing_threads_num; + tracked_files_limit = engine_settings.s3queue_tracked_files_limit; + tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; + buckets = engine_settings.s3queue_buckets; + processing_threads_num = engine_settings.s3queue_processing_threads_num; columns = storage_metadata.getColumns().toString(); } @@ -52,14 +51,15 @@ String S3QueueTableMetadata::toString() const Poco::JSON::Object json; json.set("after_processing", after_processing); json.set("mode", mode); - json.set("s3queue_tracked_files_limit", s3queue_tracked_files_limit); - json.set("s3queue_tracked_file_ttl_sec", s3queue_tracked_file_ttl_sec); - json.set("s3queue_total_shards_num", s3queue_total_shards_num); - json.set("s3queue_processing_threads_num", s3queue_processing_threads_num); + json.set("tracked_files_limit", tracked_files_limit); + json.set("tracked_file_ttl_sec", tracked_file_ttl_sec); + json.set("processing_threads_num", processing_threads_num); + json.set("buckets", buckets); json.set("format_name", format_name); json.set("columns", columns); + json.set("last_processed_file", last_processed_path); - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(json, oss); return oss.str(); @@ -72,20 +72,34 @@ void S3QueueTableMetadata::read(const String & metadata_str) after_processing = json->getValue("after_processing"); mode = json->getValue("mode"); - s3queue_tracked_files_limit = json->getValue("s3queue_tracked_files_limit"); - s3queue_tracked_file_ttl_sec = json->getValue("s3queue_tracked_file_ttl_sec"); + format_name = json->getValue("format_name"); columns = json->getValue("columns"); - if (json->has("s3queue_total_shards_num")) - s3queue_total_shards_num = json->getValue("s3queue_total_shards_num"); - else - s3queue_total_shards_num = 1; + /// Check with "s3queue_" prefix for compatibility. + { + if (json->has("s3queue_tracked_files_limit")) + tracked_files_limit = json->getValue("s3queue_tracked_files_limit"); + if (json->has("s3queue_tracked_file_ttl_sec")) + tracked_file_ttl_sec = json->getValue("s3queue_tracked_file_ttl_sec"); + if (json->has("s3queue_processing_threads_num")) + processing_threads_num = json->getValue("s3queue_processing_threads_num"); + } - if (json->has("s3queue_processing_threads_num")) - s3queue_processing_threads_num = json->getValue("s3queue_processing_threads_num"); - else - s3queue_processing_threads_num = 1; + if (json->has("tracked_files_limit")) + tracked_files_limit = json->getValue("tracked_files_limit"); + + if (json->has("tracked_file_ttl_sec")) + tracked_file_ttl_sec = json->getValue("tracked_file_ttl_sec"); + + if (json->has("last_processed_file")) + last_processed_path = json->getValue("last_processed_file"); + + if (json->has("processing_threads_num")) + processing_threads_num = json->getValue("processing_threads_num"); + + if (json->has("buckets")) + buckets = json->getValue("buckets"); } S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) @@ -95,6 +109,11 @@ S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) return metadata; } +void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const +{ + checkImmutableFieldsEquals(from_zk); +} + void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const { if (after_processing != from_zk.after_processing) @@ -113,21 +132,21 @@ void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata from_zk.mode, mode); - if (s3queue_tracked_files_limit != from_zk.s3queue_tracked_files_limit) + if (tracked_files_limit != from_zk.tracked_files_limit) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in max set size. " "Stored in ZooKeeper: {}, local: {}", - from_zk.s3queue_tracked_files_limit, - s3queue_tracked_files_limit); + from_zk.tracked_files_limit, + tracked_files_limit); - if (s3queue_tracked_file_ttl_sec != from_zk.s3queue_tracked_file_ttl_sec) + if (tracked_file_ttl_sec != from_zk.tracked_file_ttl_sec) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in max set age. " "Stored in ZooKeeper: {}, local: {}", - from_zk.s3queue_tracked_file_ttl_sec, - s3queue_tracked_file_ttl_sec); + from_zk.tracked_file_ttl_sec, + tracked_file_ttl_sec); if (format_name != from_zk.format_name) throw Exception( @@ -137,34 +156,97 @@ void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata from_zk.format_name, format_name); + if (last_processed_path != from_zk.last_processed_path) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in last processed path. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.last_processed_path, + last_processed_path); + if (modeFromString(mode) == S3QueueMode::ORDERED) { - if (s3queue_processing_threads_num != from_zk.s3queue_processing_threads_num) + if (buckets != from_zk.buckets) { throw Exception( ErrorCodes::METADATA_MISMATCH, - "Existing table metadata in ZooKeeper differs in s3queue_processing_threads_num setting. " + "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. " "Stored in ZooKeeper: {}, local: {}", - from_zk.s3queue_processing_threads_num, - s3queue_processing_threads_num); + from_zk.buckets, buckets); } - if (s3queue_total_shards_num != from_zk.s3queue_total_shards_num) + + if (S3QueueMetadata::getBucketsNum(*this) != S3QueueMetadata::getBucketsNum(from_zk)) { throw Exception( ErrorCodes::METADATA_MISMATCH, - "Existing table metadata in ZooKeeper differs in s3queue_total_shards_num setting. " + "Existing table metadata in ZooKeeper differs in processing buckets. " "Stored in ZooKeeper: {}, local: {}", - from_zk.s3queue_total_shards_num, - s3queue_total_shards_num); + S3QueueMetadata::getBucketsNum(*this), S3QueueMetadata::getBucketsNum(from_zk)); } } } -void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const +void S3QueueTableMetadata::checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected) { - checkImmutableFieldsEquals(from_zk); -} + if (current.after_processing != expected.after_processing) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs " + "in action after processing. Stored in ZooKeeper: {}, local: {}", + expected.after_processing.toString(), + current.after_processing.toString()); -} + if (current.mode != expected.mode) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in engine mode. " + "Stored in ZooKeeper: {}, local: {}", + expected.mode.toString(), + current.mode.toString()); -#endif + if (current.s3queue_tracked_files_limit != expected.s3queue_tracked_files_limit) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in max set size. " + "Stored in ZooKeeper: {}, local: {}", + expected.s3queue_tracked_files_limit, + current.s3queue_tracked_files_limit); + + if (current.s3queue_tracked_file_ttl_sec != expected.s3queue_tracked_file_ttl_sec) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in max set age. " + "Stored in ZooKeeper: {}, local: {}", + expected.s3queue_tracked_file_ttl_sec, + current.s3queue_tracked_file_ttl_sec); + + if (current.s3queue_last_processed_path.value != expected.s3queue_last_processed_path.value) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in last_processed_path. " + "Stored in ZooKeeper: {}, local: {}", + expected.s3queue_last_processed_path.value, + current.s3queue_last_processed_path.value); + + if (current.mode == S3QueueMode::ORDERED) + { + if (current.s3queue_buckets != expected.s3queue_buckets) + { + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. " + "Stored in ZooKeeper: {}, local: {}", + expected.s3queue_buckets, current.s3queue_buckets); + } + + if (S3QueueMetadata::getBucketsNum(current) != S3QueueMetadata::getBucketsNum(expected)) + { + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in processing buckets. " + "Stored in ZooKeeper: {}, local: {}", + S3QueueMetadata::getBucketsNum(current), S3QueueMetadata::getBucketsNum(expected)); + } + } +} +} diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index 84087f72a6a..d53b60570ae 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -1,9 +1,8 @@ #pragma once -#if USE_AWS_S3 - #include -#include +#include +#include #include namespace DB @@ -21,13 +20,17 @@ struct S3QueueTableMetadata String columns; String after_processing; String mode; - UInt64 s3queue_tracked_files_limit = 0; - UInt64 s3queue_tracked_file_ttl_sec = 0; - UInt64 s3queue_total_shards_num = 1; - UInt64 s3queue_processing_threads_num = 1; + UInt64 tracked_files_limit = 0; + UInt64 tracked_file_ttl_sec = 0; + UInt64 buckets = 0; + UInt64 processing_threads_num = 1; + String last_processed_path; S3QueueTableMetadata() = default; - S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata); + S3QueueTableMetadata( + const StorageObjectStorage::Configuration & configuration, + const S3QueueSettings & engine_settings, + const StorageInMemoryMetadata & storage_metadata); void read(const String & metadata_str); static S3QueueTableMetadata parse(const String & metadata_str); @@ -35,6 +38,7 @@ struct S3QueueTableMetadata String toString() const; void checkEquals(const S3QueueTableMetadata & from_zk) const; + static void checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected); private: void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; @@ -42,5 +46,3 @@ private: } - -#endif diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp b/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp new file mode 100644 index 00000000000..c61e9557fc2 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp @@ -0,0 +1,155 @@ +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + zkutil::ZooKeeperPtr getZooKeeper() + { + return Context::getGlobalContextInstance()->getZooKeeper(); + } +} + +S3QueueUnorderedFileMetadata::S3QueueUnorderedFileMetadata( + const std::filesystem::path & zk_path, + const std::string & path_, + FileStatusPtr file_status_, + size_t max_loading_retries_, + LoggerPtr log_) + : S3QueueIFileMetadata( + path_, + /* processing_node_path */zk_path / "processing" / getNodeName(path_), + /* processed_node_path */zk_path / "processed" / getNodeName(path_), + /* failed_node_path */zk_path / "failed" / getNodeName(path_), + file_status_, + max_loading_retries_, + log_) +{ +} + +std::pair S3QueueUnorderedFileMetadata::setProcessingImpl() +{ + /// In one zookeeper transaction do the following: + enum RequestType + { + /// node_name is not within processed persistent nodes + PROCESSED_PATH_DOESNT_EXIST = 0, + /// node_name is not within failed persistent nodes + FAILED_PATH_DOESNT_EXIST = 2, + /// node_name ephemeral processing node was successfully created + CREATED_PROCESSING_PATH = 4, + /// update processing id + SET_PROCESSING_ID = 6, + }; + + const auto zk_client = getZooKeeper(); + processing_id = node_metadata.processing_id = getRandomASCIIString(10); + auto processor_info = getProcessorInfo(processing_id.value()); + + Coordination::Requests requests; + requests.push_back(zkutil::makeCreateRequest(processed_node_path, "", zkutil::CreateMode::Persistent)); + requests.push_back(zkutil::makeRemoveRequest(processed_node_path, -1)); + requests.push_back(zkutil::makeCreateRequest(failed_node_path, "", zkutil::CreateMode::Persistent)); + requests.push_back(zkutil::makeRemoveRequest(failed_node_path, -1)); + requests.push_back(zkutil::makeCreateRequest(processing_node_path, node_metadata.toString(), zkutil::CreateMode::Ephemeral)); + + requests.push_back( + zkutil::makeCreateRequest( + processing_node_id_path, processor_info, zkutil::CreateMode::Persistent, /* ignore_if_exists */true)); + requests.push_back(zkutil::makeSetRequest(processing_node_id_path, processor_info, -1)); + + Coordination::Responses responses; + const auto code = zk_client->tryMulti(requests, responses); + auto is_request_failed = [&](RequestType type) { return responses[type]->error != Coordination::Error::ZOK; }; + + if (code == Coordination::Error::ZOK) + { + const auto * set_response = dynamic_cast(responses[SET_PROCESSING_ID].get()); + processing_id_version = set_response->stat.version; + return std::pair{true, FileStatus::State::None}; + } + + if (is_request_failed(PROCESSED_PATH_DOESNT_EXIST)) + return {false, FileStatus::State::Processed}; + + if (is_request_failed(FAILED_PATH_DOESNT_EXIST)) + return {false, FileStatus::State::Failed}; + + if (is_request_failed(CREATED_PROCESSING_PATH)) + return {false, FileStatus::State::Processing}; + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state of zookeeper transaction: {}", magic_enum::enum_name(code)); +} + +void S3QueueUnorderedFileMetadata::setProcessedAtStartRequests( + Coordination::Requests & requests, + const zkutil::ZooKeeperPtr &) +{ + requests.push_back( + zkutil::makeCreateRequest( + processed_node_path, node_metadata.toString(), zkutil::CreateMode::Persistent)); +} + +void S3QueueUnorderedFileMetadata::setProcessedImpl() +{ + /// In one zookeeper transaction do the following: + enum RequestType + { + SET_MAX_PROCESSED_PATH = 0, + CHECK_PROCESSING_ID_PATH = 1, /// Optional. + REMOVE_PROCESSING_ID_PATH = 2, /// Optional. + REMOVE_PROCESSING_PATH = 3, /// Optional. + }; + + const auto zk_client = getZooKeeper(); + std::string failure_reason; + + Coordination::Requests requests; + requests.push_back( + zkutil::makeCreateRequest( + processed_node_path, node_metadata.toString(), zkutil::CreateMode::Persistent)); + + if (processing_id_version.has_value()) + { + requests.push_back(zkutil::makeCheckRequest(processing_node_id_path, processing_id_version.value())); + requests.push_back(zkutil::makeRemoveRequest(processing_node_id_path, processing_id_version.value())); + requests.push_back(zkutil::makeRemoveRequest(processing_node_path, -1)); + } + + Coordination::Responses responses; + auto is_request_failed = [&](RequestType type) { return responses[type]->error != Coordination::Error::ZOK; }; + + const auto code = zk_client->tryMulti(requests, responses); + if (code == Coordination::Error::ZOK) + { + if (max_loading_retries) + zk_client->tryRemove(failed_node_path + ".retriable", -1); + + LOG_TRACE(log, "Moved file `{}` to processed (node path: {})", path, processed_node_path); + return; + } + + if (Coordination::isHardwareError(code)) + failure_reason = "Lost connection to keeper"; + else if (is_request_failed(SET_MAX_PROCESSED_PATH)) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot create a persistent node in /processed since it already exists"); + else if (is_request_failed(CHECK_PROCESSING_ID_PATH)) + failure_reason = "Version of processing id node changed"; + else if (is_request_failed(REMOVE_PROCESSING_PATH)) + failure_reason = "Failed to remove processing path"; + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state of zookeeper transaction: {}", code); + + LOG_WARNING(log, "Cannot set file {} as processed: {}. Reason: {}", path, code, failure_reason); +} + +} diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h b/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h new file mode 100644 index 00000000000..24c2765bf3a --- /dev/null +++ b/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class S3QueueUnorderedFileMetadata : public S3QueueIFileMetadata +{ +public: + using Bucket = size_t; + + explicit S3QueueUnorderedFileMetadata( + const std::filesystem::path & zk_path, + const std::string & path_, + FileStatusPtr file_status_, + size_t max_loading_retries_, + LoggerPtr log_); + + static std::vector getMetadataPaths() { return {"processed", "failed", "processing"}; } + + void setProcessedAtStartRequests( + Coordination::Requests & requests, + const zkutil::ZooKeeperPtr & zk_client) override; + +private: + std::pair setProcessingImpl() override; + void setProcessedImpl() override; +}; + +} diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index b6daadf8bc4..0844d0a479e 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -1,6 +1,6 @@ +#include #include "config.h" -#if USE_AWS_S3 #include #include #include @@ -17,13 +17,15 @@ #include #include #include -#include +#include #include #include #include #include #include #include +#include +#include #include #include @@ -45,17 +47,10 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int S3_ERROR; extern const int QUERY_NOT_ALLOWED; - extern const int REPLICA_ALREADY_EXISTS; - extern const int INCOMPATIBLE_COLUMNS; } namespace { - bool containsGlobs(const S3::URI & url) - { - return url.key.find_first_of("*?{") != std::string::npos; - } - std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const S3QueueSettings & s3queue_settings) { std::string zk_path_prefix = settings.s3queue_default_zookeeper_path.value; @@ -99,34 +94,33 @@ namespace StorageS3Queue::StorageS3Queue( std::unique_ptr s3queue_settings_, - const StorageS3::Configuration & configuration_, + const ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & comment, ContextPtr context_, std::optional format_settings_, - ASTStorage * engine_args, + ASTStorage * /* engine_args */, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_) , s3queue_settings(std::move(s3queue_settings_)) , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *s3queue_settings)) - , after_processing(s3queue_settings->after_processing) , configuration{configuration_} , format_settings(format_settings_) , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) - , log(getLogger("StorageS3Queue (" + table_id_.table_name + ")")) + , log(getLogger("StorageS3Queue (" + table_id_.getFullTableName() + ")")) { - if (configuration.url.key.empty()) + if (configuration->getPath().empty()) { - configuration.url.key = "/*"; + configuration->setPath("/*"); } - else if (configuration.url.key.ends_with('/')) + else if (configuration->getPath().ends_with('/')) { - configuration.url.key += '*'; + configuration->setPath(configuration->getPath() + '*'); } - else if (!containsGlobs(configuration.url)) + else if (!configuration->isPathWithGlobs()) { throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); } @@ -134,71 +128,43 @@ StorageS3Queue::StorageS3Queue( if (mode == LoadingStrictnessLevel::CREATE && !context_->getSettingsRef().s3queue_allow_experimental_sharded_mode && s3queue_settings->mode == S3QueueMode::ORDERED - && (s3queue_settings->s3queue_total_shards_num > 1 || s3queue_settings->s3queue_processing_threads_num > 1)) + && (s3queue_settings->s3queue_buckets > 1 || s3queue_settings->s3queue_processing_threads_num > 1)) { throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue sharded mode is not allowed. To enable use `s3queue_allow_experimental_sharded_mode`"); } checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef()); - configuration.update(context_); - FormatFactory::instance().checkFormatName(configuration.format); - context_->getRemoteHostFilter().checkURL(configuration.url.uri); + object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); + FormatFactory::instance().checkFormatName(configuration->format); + configuration->check(context_); + + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context_); + configuration->check(context_); StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_); - else - columns = StorageS3::getTableStructureFromData(configuration, format_settings, context_); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_).second; - storage_metadata.setColumns(columns_); - } - + storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); - try - { - createOrCheckMetadata(storage_metadata); - } - catch (...) - { - throw; - } - /// Get metadata manager from S3QueueMetadataFactory, /// it will increase the ref count for the metadata object. /// The ref count is decreased when StorageS3Queue::drop() method is called. files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings); - - if (files_metadata->isShardedProcessing()) + try { - if (!s3queue_settings->s3queue_current_shard_num.changed) - { - s3queue_settings->s3queue_current_shard_num = static_cast(files_metadata->registerNewShard()); - engine_args->settings->changes.setSetting("s3queue_current_shard_num", s3queue_settings->s3queue_current_shard_num.value); - } - else if (!files_metadata->isShardRegistered(s3queue_settings->s3queue_current_shard_num)) - { - files_metadata->registerNewShard(s3queue_settings->s3queue_current_shard_num); - } + files_metadata->initialize(configuration_, storage_metadata); } - if (s3queue_settings->mode == S3QueueMode::ORDERED && !s3queue_settings->s3queue_last_processed_path.value.empty()) + catch (...) { - files_metadata->setFileProcessed(s3queue_settings->s3queue_last_processed_path.value, s3queue_settings->s3queue_current_shard_num); + S3QueueMetadataFactory::instance().remove(zk_path); + throw; } } @@ -221,14 +187,7 @@ void StorageS3Queue::shutdown(bool is_drop) if (files_metadata) { - files_metadata->deactivateCleanupTask(); - - if (is_drop && files_metadata->isShardedProcessing()) - { - files_metadata->unregisterShard(s3queue_settings->s3queue_current_shard_num); - LOG_TRACE(log, "Unregistered shard {} from zookeeper", s3queue_settings->s3queue_current_shard_num); - } - + files_metadata->shutdown(); files_metadata.reset(); } LOG_TRACE(log, "Shut down storage"); @@ -241,7 +200,7 @@ void StorageS3Queue::drop() bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context_, format_settings); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context_, format_settings); } class ReadFromS3Queue : public SourceStepWithFilter @@ -293,7 +252,8 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -347,9 +307,9 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const createIterator(nullptr); for (size_t i = 0; i < adjusted_num_streams; ++i) pipes.emplace_back(storage->createSource( + i, info, iterator, - storage->files_metadata->getIdForProcessingThread(i, storage->s3queue_settings->s3queue_current_shard_num), max_block_size, context)); auto pipe = Pipe::unitePipes(std::move(pipes)); @@ -363,50 +323,44 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const } std::shared_ptr StorageS3Queue::createSource( + size_t processor_id, const ReadFromFormatInfo & info, std::shared_ptr file_iterator, - size_t processing_id, size_t max_block_size, ContextPtr local_context) { - auto configuration_snapshot = updateConfigurationAndGetCopy(local_context); - - auto internal_source = std::make_unique( - info, configuration.format, getName(), local_context, format_settings, + auto internal_source = std::make_unique( + getName(), + object_storage, + configuration, + info, + format_settings, + local_context, max_block_size, - configuration_snapshot.request_settings, - configuration_snapshot.compression_method, - configuration_snapshot.client, - configuration_snapshot.url.bucket, - configuration_snapshot.url.version_id, - configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()), - file_iterator, local_context->getSettingsRef().max_download_threads, false); + file_iterator, + local_context->getSettingsRef().max_download_threads, + false); - auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable + auto file_deleter = [=, this](const std::string & path) mutable { - S3::DeleteObjectRequest request; - request.WithKey(path).WithBucket(bucket); - auto outcome = client->DeleteObject(request); - if (blob_storage_log) - blob_storage_log->addEvent( - BlobStorageLogElement::EventType::Delete, - bucket, path, {}, 0, outcome.IsSuccess() ? nullptr : &outcome.GetError()); - - if (!outcome.IsSuccess()) - { - const auto & err = outcome.GetError(); - LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); - } - else - { - LOG_TRACE(log, "Object with path {} was removed from S3", path); - } + object_storage->removeObject(StoredObject(path)); }; auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; return std::make_shared( - getName(), info.source_header, std::move(internal_source), - files_metadata, processing_id, after_processing, file_deleter, info.requested_virtual_columns, - local_context, shutdown_called, table_is_being_dropped, s3_queue_log, getStorageID(), log); + getName(), + processor_id, + info.source_header, + std::move(internal_source), + files_metadata, + s3queue_settings->after_processing, + file_deleter, + info.requested_virtual_columns, + local_context, + shutdown_called, + table_is_being_dropped, + s3_queue_log, + getStorageID(), + log); } bool StorageS3Queue::hasDependencies(const StorageID & table_id) @@ -469,7 +423,7 @@ void StorageS3Queue::threadFunc() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + LOG_ERROR(log, "Failed to process data: {}", getCurrentExceptionMessage(true)); } if (!shutdown_called) @@ -494,7 +448,6 @@ bool StorageS3Queue::streamToViews() auto s3queue_context = Context::createCopy(getContext()); s3queue_context->makeQueryContext(); - auto query_configuration = updateConfigurationAndGetCopy(s3queue_context); // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns @@ -508,10 +461,7 @@ bool StorageS3Queue::streamToViews() pipes.reserve(s3queue_settings->s3queue_processing_threads_num); for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i) { - auto source = createSource( - read_from_format_info, file_iterator, files_metadata->getIdForProcessingThread(i, s3queue_settings->s3queue_current_shard_num), - DBMS_DEFAULT_BUFFER_SIZE, s3queue_context); - + auto source = createSource(i, read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context); pipes.emplace_back(std::move(source)); } auto pipe = Pipe::unitePipes(std::move(pipes)); @@ -529,105 +479,33 @@ bool StorageS3Queue::streamToViews() return rows > 0; } -StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(ContextPtr local_context) -{ - configuration.update(local_context); - return configuration; -} - zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const { return getContext()->getZooKeeper(); } -void StorageS3Queue::createOrCheckMetadata(const StorageInMemoryMetadata & storage_metadata) -{ - auto zookeeper = getZooKeeper(); - zookeeper->createAncestors(zk_path); - - for (size_t i = 0; i < 1000; ++i) - { - Coordination::Requests requests; - if (zookeeper->exists(zk_path / "metadata")) - { - checkTableStructure(zk_path, storage_metadata); - } - else - { - std::string metadata = S3QueueTableMetadata(configuration, *s3queue_settings, storage_metadata).toString(); - requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); - requests.emplace_back(zkutil::makeCreateRequest(zk_path / "processed", "", zkutil::CreateMode::Persistent)); - requests.emplace_back(zkutil::makeCreateRequest(zk_path / "failed", "", zkutil::CreateMode::Persistent)); - requests.emplace_back(zkutil::makeCreateRequest(zk_path / "processing", "", zkutil::CreateMode::Persistent)); - requests.emplace_back(zkutil::makeCreateRequest(zk_path / "metadata", metadata, zkutil::CreateMode::Persistent)); - } - - Coordination::Responses responses; - auto code = zookeeper->tryMulti(requests, responses); - if (code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "It looks like the table {} was created by another server at the same moment, will retry", zk_path.string()); - continue; - } - else if (code != Coordination::Error::ZOK) - { - zkutil::KeeperMultiException::check(code, requests, responses); - } - return; - } - - throw Exception( - ErrorCodes::REPLICA_ALREADY_EXISTS, - "Cannot create table, because it is created concurrently every time or because " - "of wrong zk_path or because of logical error"); -} - - -void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const StorageInMemoryMetadata & storage_metadata) -{ - // Verify that list of columns and table settings match those specified in ZK (/metadata). - // If not, throw an exception. - - auto zookeeper = getZooKeeper(); - String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata"); - auto metadata_from_zk = S3QueueTableMetadata::parse(metadata_str); - - S3QueueTableMetadata old_metadata(configuration, *s3queue_settings, storage_metadata); - old_metadata.checkEquals(metadata_from_zk); - - auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns); - const ColumnsDescription & old_columns = storage_metadata.getColumns(); - if (columns_from_zk != old_columns) - { - throw Exception( - ErrorCodes::INCOMPATIBLE_COLUMNS, - "Table columns structure in ZooKeeper is different from local table structure. Local columns:\n" - "{}\nZookeeper columns:\n{}", - old_columns.toString(), - columns_from_zk.toString()); - } -} - std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { - auto glob_iterator = std::make_unique( - *configuration.client, configuration.url, predicate, getVirtualsList(), local_context, - /* read_keys */nullptr, configuration.request_settings); + auto settings = configuration->getQuerySettings(local_context); + auto glob_iterator = std::make_unique( + object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match); - return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); + return std::make_shared(files_metadata, std::move(glob_iterator), shutdown_called, log); } -void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) +#if USE_AWS_S3 +void registerStorageS3Queue(StorageFactory & factory) { factory.registerStorage( - name, + "S3Queue", [](const StorageFactory::Arguments & args) { auto & engine_args = args.engine_args; if (engine_args.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current @@ -682,13 +560,6 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) .source_access_type = AccessType::S3, }); } - -void registerStorageS3Queue(StorageFactory & factory) -{ - return registerStorageS3QueueImpl("S3Queue", factory); -} - -} - - #endif + +} diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 1f735b47819..ef83a1ccc25 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -1,14 +1,13 @@ #pragma once #include "config.h" -#if USE_AWS_S3 #include #include #include #include #include #include -#include +#include #include #include #include @@ -16,16 +15,16 @@ namespace DB { -class S3QueueFilesMetadata; +class S3QueueMetadata; class StorageS3Queue : public IStorage, WithContext { public: - using Configuration = typename StorageS3::Configuration; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; StorageS3Queue( std::unique_ptr s3queue_settings_, - const Configuration & configuration_, + ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -47,7 +46,7 @@ public: size_t max_block_size, size_t num_streams) override; - const auto & getFormatName() const { return configuration.format; } + const auto & getFormatName() const { return configuration->format; } const fs::path & getZooKeeperPath() const { return zk_path; } @@ -59,10 +58,10 @@ private: const std::unique_ptr s3queue_settings; const fs::path zk_path; - const S3QueueAction after_processing; - std::shared_ptr files_metadata; - Configuration configuration; + std::shared_ptr files_metadata; + ConfigurationPtr configuration; + ObjectStoragePtr object_storage; const std::optional format_settings; @@ -81,24 +80,19 @@ private: void drop() override; bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); std::shared_ptr createSource( + size_t processor_id, const ReadFromFormatInfo & info, std::shared_ptr file_iterator, - size_t processing_id, size_t max_block_size, ContextPtr local_context); bool hasDependencies(const StorageID & table_id); bool streamToViews(); void threadFunc(); - - void createOrCheckMetadata(const StorageInMemoryMetadata & storage_metadata); - void checkTableStructure(const String & zookeeper_prefix, const StorageInMemoryMetadata & storage_metadata); - Configuration updateConfigurationAndGetCopy(ContextPtr local_context); }; } - -#endif diff --git a/src/Storages/SelectQueryInfo.cpp b/src/Storages/SelectQueryInfo.cpp index 665da7fee70..d59ccf0dfaf 100644 --- a/src/Storages/SelectQueryInfo.cpp +++ b/src/Storages/SelectQueryInfo.cpp @@ -13,4 +13,24 @@ bool SelectQueryInfo::isFinal() const return select.final(); } +std::unordered_map SelectQueryInfo::buildNodeNameToInputNodeColumn() const +{ + std::unordered_map node_name_to_input_node_column; + if (planner_context) + { + const auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) + { + /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, + /// so they should not be added to the input nodes. + if (alias_column_expressions.contains(column_name)) + continue; + const auto & column = table_expression_data.getColumnOrThrow(column_name); + node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); + } + } + return node_name_to_input_node_column; +} + } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 655676812d9..11e2a2fc5e7 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -239,5 +239,11 @@ struct SelectQueryInfo bool merge_tree_enable_remove_parts_from_snapshot_optimization = true; bool isFinal() const; + + /// Analyzer generates unique ColumnIdentifiers like __table1.__partition_id in filter nodes, + /// while key analysis still requires unqualified column names. + /// This function generates a map that maps the unique names to table column names, + /// for the current table (`table_expression`). + std::unordered_map buildNodeNameToInputNodeColumn() const; }; } diff --git a/src/Storages/Statistics/Estimator.cpp b/src/Storages/Statistics/Estimator.cpp index 7e0e465c7bf..e272014c1c2 100644 --- a/src/Storages/Statistics/Estimator.cpp +++ b/src/Storages/Statistics/Estimator.cpp @@ -112,7 +112,7 @@ Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node) auto [op, val] = extractBinaryOp(node, col); if (op == "equals") { - if (val < - threshold || val > threshold) + if (val < -threshold || val > threshold) return default_normal_cond_factor; else return default_good_cond_factor; diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index a427fb6a7cd..7d4226f2fbe 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -22,6 +22,31 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; }; +StatisticDescription & StatisticDescription::operator=(const StatisticDescription & other) +{ + if (this == &other) + return *this; + + type = other.type; + column_name = other.column_name; + ast = other.ast ? other.ast->clone() : nullptr; + + return *this; +} + +StatisticDescription & StatisticDescription::operator=(StatisticDescription && other) noexcept +{ + if (this == &other) + return *this; + + type = std::exchange(other.type, StatisticType{}); + column_name = std::move(other.column_name); + ast = other.ast ? other.ast->clone() : nullptr; + other.ast.reset(); + + return *this; +} + StatisticType stringToType(String type) { if (type == "tdigest") @@ -55,15 +80,7 @@ std::vector StatisticDescription::getStatisticsFromAST(con const auto & column = columns.getPhysical(column_name); stat.column_name = column.name; - - auto function_node = std::make_shared(); - function_node->name = "STATISTIC"; - function_node->arguments = std::make_shared(); - function_node->arguments->children.push_back(std::make_shared(stat_definition->type)); - function_node->children.push_back(function_node->arguments); - - stat.ast = function_node; - + stat.ast = makeASTFunction("STATISTIC", std::make_shared(stat_definition->type)); stats.push_back(stat); } @@ -80,6 +97,7 @@ StatisticDescription StatisticDescription::getStatisticFromColumnDeclaration(con const auto & stat_type_list_ast = column.stat_type->as().arguments; if (stat_type_list_ast->children.size() != 1) throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect only one statistic type for column {}", queryToString(column)); + const auto & stat_type = stat_type_list_ast->children[0]->as().name; StatisticDescription stat; diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h index 9a66951ab52..b571fa31e9d 100644 --- a/src/Storages/StatisticsDescription.h +++ b/src/Storages/StatisticsDescription.h @@ -27,6 +27,10 @@ struct StatisticDescription String getTypeName() const; StatisticDescription() = default; + StatisticDescription(const StatisticDescription & other) { *this = other; } + StatisticDescription & operator=(const StatisticDescription & other); + StatisticDescription(StatisticDescription && other) noexcept { *this = std::move(other); } + StatisticDescription & operator=(StatisticDescription && other) noexcept; bool operator==(const StatisticDescription & other) const { diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp deleted file mode 100644 index f2e2833dad4..00000000000 --- a/src/Storages/StorageAzureBlob.cpp +++ /dev/null @@ -1,1631 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -using namespace Azure::Storage::Blobs; - -namespace CurrentMetrics -{ - extern const Metric ObjectStorageAzureThreads; - extern const Metric ObjectStorageAzureThreadsActive; - extern const Metric ObjectStorageAzureThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; -} - -namespace -{ - -const std::unordered_set required_configuration_keys = { - "blob_path", - "container", -}; - -const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "structure", - "compression_method", - "account_name", - "account_key", - "connection_string", - "storage_account_url", -}; - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); - configuration.blob_path = collection.get("blob_path"); - - if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); - - if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); - - configuration.structure = collection.getOrDefault("structure", "auto"); - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); -} - - -StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) -{ - StorageAzureBlob::Configuration configuration; - - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; - } - - if (engine_args.size() < 3 || engine_args.size() > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage AzureBlobStorage requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool - { - return s == "auto" || FormatFactory::instance().exists(s); - }; - - if (engine_args.size() == 4) - { - //'c1 UInt64, c2 UInt64 - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key"); - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; -} - - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - -void registerStorageAzureBlob(StorageFactory & factory) -{ - factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - auto settings = StorageAzureBlob::createSettings(args.getContext()); - - return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing */ false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::AZURE, - }); -} - -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - - -StorageAzureBlob::StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - const ContextPtr & context, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , name("AzureBlobStorage") - , configuration(configuration_) - , object_storage(std::move(object_storage_)) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context); - else - columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context).second; - - /// We don't allow special columns in File storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); -} - -void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) -{ - if (configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", - configuration.blob_path); - } - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); - - object_storage->removeObjectsIfExist(objects); -} - -namespace -{ - -class StorageAzureBlobSink : public SinkToStorage -{ -public: - StorageAzureBlobSink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - AzureObjectStorage * object_storage, - const String & blob_path) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - StoredObject object(blob_path); - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - object_storage->writeObject(object, WriteMode::Rewrite), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageAzureBlobSink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, AzureObjectStorage * object_storage, const String & path, size_t sequence_number) - { - if (context->getSettingsRef().azure_truncate_on_insert || !object_storage->exists(StoredObject(path))) - return std::nullopt; - - if (context->getSettingsRef().azure_create_new_file_on_insert) - { - auto pos = path.find_first_of('.'); - String new_path; - do - { - new_path = path.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : path.substr(pos)); - ++sequence_number; - } - while (object_storage->exists(StoredObject(new_path))); - - return new_path; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object with key {} already exists. " - "If you want to overwrite it, enable setting azure_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting azure_create_new_file_on_insert", - path); - } -} - -class PartitionedStorageAzureBlobSink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageAzureBlobSink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - AzureObjectStorage * object_storage_, - const String & blob_) - : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , object_storage(object_storage_) - , blob(blob_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_key = replaceWildcards(blob, partition_id); - validateKey(partition_key); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(getContext(), object_storage, partition_key, 1)) - partition_key = *new_path; - - return std::make_shared( - format, - sample_block, - getContext(), - format_settings, - compression_method, - object_storage, - partition_key - ); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - AzureObjectStorage * object_storage; - const String blob; - const std::optional format_settings; - - ExpressionActionsPtr partition_by_expr; - - static void validateKey(const String & str) - { - validatePartitionKey(str, true); - } -}; - -} - -class ReadFromAzureBlob : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromAzureBlob"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromAzureBlob( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::shared_ptr storage_, - ReadFromFormatInfo info_, - const bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , storage(std::move(storage_)) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - std::shared_ptr storage; - ReadFromFormatInfo info; - const bool need_only_count; - - size_t max_block_size; - const size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromAzureBlob::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageAzureBlob::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - if (partition_by && configuration.withWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet"); - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - std::move(this_ptr), - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - const auto & configuration = storage->configuration; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared(context, - context->getReadTaskCallback()); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } - else - { - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } -} - -void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - const auto & configuration = storage->configuration; - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - configuration.format, - getName(), - context, - storage->format_settings, - max_block_size, - configuration.compression_method, - storage->object_storage.get(), - configuration.container, - configuration.connection_url, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - auto path = configuration.blobs_paths.front(); - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(path, configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && configuration.withWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } - else - { - if (configuration.withGlobs()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); - - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(local_context, object_storage.get(), path, configuration.blobs_paths.size())) - { - configuration.blobs_paths.push_back(*new_path); - path = *new_path; - } - - return std::make_shared( - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } -} - -bool StorageAzureBlob::supportsPartitionBy() const -{ - return true; -} - -bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); -} - -bool StorageAzureBlob::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); -} - -bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); -} - -StorageAzureBlobSource::GlobIterator::GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , blob_path_with_globs(blob_path_with_globs_) - , virtual_columns(virtual_columns_) - , outer_blobs(outer_blobs_) - , file_progress_callback(file_progress_callback_) -{ - - const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == blob_path_with_globs.size()) - { - auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs); - blobs_with_metadata.emplace_back( - blob_path_with_globs, - object_metadata); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata.back()); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - is_finished = true; - return; - } - - object_storage_iterator = object_storage->iterate(key_prefix); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(blob_path_with_globs)); - - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error()); - - recursive = blob_path_with_globs == "/**" ? true : false; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); -} - -RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() -{ - std::lock_guard lock(next_mutex); - - if (is_finished && index >= blobs_with_metadata.size()) - { - return {}; - } - - bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); - - if (need_new_batch) - { - RelativePathsWithMetadata new_batch; - while (new_batch.empty()) - { - auto result = object_storage_iterator->getCurrrentBatchAndScheduleNext(); - if (result.has_value()) - { - new_batch = result.value(); - } - else - { - is_finished = true; - return {}; - } - - for (auto it = new_batch.begin(); it != new_batch.end();) - { - if (!recursive && !re2::RE2::FullMatch(it->relative_path, *matcher)) - it = new_batch.erase(it); - else - ++it; - } - } - - index = 0; - - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (auto & path_with_metadata : new_batch) - paths.push_back(fs::path(container) / path_with_metadata.relative_path); - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - } - - if (outer_blobs) - outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); - - blobs_with_metadata = std::move(new_batch); - if (file_progress_callback) - { - for (const auto & [relative_path, info] : blobs_with_metadata) - { - file_progress_callback(FileProgress(0, info.size_bytes)); - } - } - } - - size_t current_index = index++; - if (current_index >= blobs_with_metadata.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); - return blobs_with_metadata[current_index]; -} - -StorageAzureBlobSource::KeysIterator::KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , virtual_columns(virtual_columns_) -{ - Strings all_keys = keys_; - - ASTPtr filter_ast; - if (!all_keys.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - Strings paths; - paths.reserve(all_keys.size()); - for (const auto & key : all_keys) - paths.push_back(fs::path(container) / key); - - VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext()); - } - - for (auto && key : all_keys) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - keys.emplace_back(key, object_metadata); - } - - if (outer_blobs) - *outer_blobs = keys; -} - -RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next() -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - - return keys[current_index]; -} - -Chunk StorageAzureBlobSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - requested_virtual_columns, - fs::path(container) / reader.getRelativePath(), - reader.getRelativePathWithMetadata().metadata.size_bytes); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageAzureBlobSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - String source = fs::path(connection_url) / container / path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageAzureBlobSource::tryGetNumRowsFromCache(const DB::RelativePathWithMetadata & path_with_metadata) -{ - String source = fs::path(connection_url) / container / path_with_metadata.relative_path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - auto last_mod = path_with_metadata.metadata.last_modified; - if (last_mod) - return last_mod->epochTime(); - return std::nullopt; - }; - - return StorageAzureBlob::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -StorageAzureBlobSource::StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_) - :ISource(info.source_header, false) - , WithContext(context_) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , format(format_) - , name(std::move(name_)) - , sample_block(info.format_header) - , format_settings(format_settings_) - , columns_desc(info.columns_description) - , max_block_size(max_block_size_) - , compression_hint(compression_hint_) - , object_storage(std::move(object_storage_)) - , container(container_) - , connection_url(connection_url_) - , file_iterator(file_iterator_) - , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "AzureReader")) -{ - reader = createReader(); - if (reader) - reader_future = createReaderAsync(); -} - - -StorageAzureBlobSource::~StorageAzureBlobSource() -{ - create_reader_pool.wait(); -} - -String StorageAzureBlobSource::getName() const -{ - return name; -} - -StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() -{ - auto path_with_metadata = file_iterator->next(); - if (path_with_metadata.relative_path.empty()) - return {}; - - if (path_with_metadata.metadata.size_bytes == 0) - path_with_metadata.metadata = object_storage->getObjectMetadata(path_with_metadata.relative_path); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - auto compression_method = chooseCompressionMethod(path_with_metadata.relative_path, compression_hint); - read_buf = createAzureReadBuffer(path_with_metadata.relative_path, path_with_metadata.metadata.size_bytes); - auto input_format = FormatFactory::instance().getInput( - format, *read_buf, sample_block, getContext(), max_block_size, - format_settings, max_parsing_threads, std::nullopt, - /* is_remote_fs */ true, compression_method); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{path_with_metadata, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageAzureBlobSource::createReaderAsync() -{ - return create_reader_scheduler([this] { return createReader(); }, Priority{}); -} - -std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from Azure with initial prefetch", object_size); - return createAsyncAzureReadBuffer(key, read_settings, object_size); - } - - return object_storage->readObject(StoredObject(key), read_settings, {}, object_size); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::shared_ptr & file_iterator_, - AzureObjectStorage * object_storage_, - std::optional format_, - const StorageAzureBlob::Configuration & configuration_, - const std::optional & format_settings_, - const RelativePathsWithMetadata & read_keys_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , object_storage(object_storage_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , read_keys(read_keys_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - /// For default mode check cached columns for currently read keys on first iteration. - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key : read_keys) - { - if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(key.relative_path)) - { - format = format_from_path; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - current_path_with_metadata = file_iterator->next(); - - if (current_path_with_metadata.relative_path.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually"); - } - - return {nullptr, std::nullopt, format}; - } - - first = false; - - /// AzureBlobStorage file iterator could get new keys after new iteration. - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it).relative_path)) - { - format = format_from_file_name; - break; - } - } - } - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - RelativePathsWithMetadata paths = {current_path_with_metadata}; - if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache, format}; - } - - first = false; - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return {wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max), std::nullopt, format}; - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override { return current_path_with_metadata.relative_path; } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max); - } - - private: - std::optional tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_azure) - return std::nullopt; - - auto & schema_cache = StorageAzureBlob::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] -> std::optional - { - if (it->metadata.last_modified) - return it->metadata.last_modified->epochTime(); - return std::nullopt; - }; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - String source = host_and_bucket + '/' + it->relative_path; - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - AzureObjectStorage * object_storage; - const StorageAzureBlob::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - const RelativePathsWithMetadata & read_keys; - size_t prev_read_keys_size; - RelativePathWithMetadata current_path_with_metadata; - bool first = true; - }; -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromDataImpl( - std::optional format, - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - RelativePathsWithMetadata read_keys; - std::shared_ptr file_iterator; - if (configuration.withGlobs()) - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - else - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - - ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx); -} - -ColumnsDescription StorageAzureBlob::getTableStructureFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration.format, object_storage, configuration, format_settings, ctx).first; -} - -SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_azure", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - - -std::unique_ptr StorageAzureBlobSource::createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) -{ - auto modified_settings{read_settings}; - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, modified_settings); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -} - -#endif diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h deleted file mode 100644 index 20e7f4a6c90..00000000000 --- a/src/Storages/StorageAzureBlob.h +++ /dev/null @@ -1,345 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class StorageAzureBlob : public IStorage -{ -public: - - using AzureClient = Azure::Storage::Blobs::BlobContainerClient; - using AzureClientPtr = std::unique_ptr; - - struct Configuration : public StatelessTableEngineConfiguration - { - Configuration() = default; - - String getPath() const { return blob_path; } - - bool update(const ContextPtr & context); - - bool withGlobs() const { return blob_path.find_first_of("*?{") != std::string::npos; } - - bool withWildcard() const - { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return blobs_paths.back().find(PARTITION_ID_WILDCARD) != String::npos; - } - - Poco::URI getConnectionURL() const; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; - std::string blob_path; - std::vector blobs_paths; - }; - - StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_); - - static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true); - - static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context); - - static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection); - - String getName() const override - { - return name; - } - - void read( - QueryPlan & query_plan, - const Names &, - const StorageSnapshotPtr &, - SelectQueryInfo &, - ContextPtr, - QueryProcessingStage::Enum, - size_t, - size_t) override; - - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /* metadata_snapshot */, ContextPtr context, bool /*async_insert*/) override; - - void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - - bool supportsPartitionBy() const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsSubsetOfColumns(const ContextPtr & context) const; - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - - bool prefersLargeBlocks() const override; - - bool parallelizeOutputAfterReading(ContextPtr context) const override; - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - static ColumnsDescription getTableStructureFromData( - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - - static std::pair getTableStructureAndFormatFromData( - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - -private: - static std::pair getTableStructureAndFormatFromDataImpl( - std::optional format, - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - - friend class ReadFromAzureBlob; - - std::string name; - Configuration configuration; - std::unique_ptr object_storage; - - const bool distributed_processing; - std::optional format_settings; - ASTPtr partition_by; -}; - -class StorageAzureBlobSource : public ISource, WithContext -{ -public: - class IIterator : public WithContext - { - public: - explicit IIterator(const ContextPtr & context_):WithContext(context_) {} - virtual ~IIterator() = default; - virtual RelativePathWithMetadata next() = 0; - - RelativePathWithMetadata operator ()() { return next(); } - }; - - class GlobIterator : public IIterator - { - public: - GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_ = {}); - - RelativePathWithMetadata next() override; - ~GlobIterator() override = default; - - private: - AzureObjectStorage * object_storage; - std::string container; - String blob_path_with_globs; - ActionsDAGPtr filter_dag; - NamesAndTypesList virtual_columns; - - size_t index = 0; - - RelativePathsWithMetadata blobs_with_metadata; - RelativePathsWithMetadata * outer_blobs; - ObjectStorageIteratorPtr object_storage_iterator; - bool recursive{false}; - - std::unique_ptr matcher; - - void createFilterAST(const String & any_key); - bool is_finished = false; - std::mutex next_mutex; - - std::function file_progress_callback; - }; - - class ReadIterator : public IIterator - { - public: - explicit ReadIterator(const ContextPtr & context_, - const ReadTaskCallback & callback_) - : IIterator(context_), callback(callback_) { } - RelativePathWithMetadata next() override - { - return {callback(), {}}; - } - - private: - ReadTaskCallback callback; - }; - - class KeysIterator : public IIterator - { - public: - KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback = {}); - - RelativePathWithMetadata next() override; - ~KeysIterator() override = default; - - private: - AzureObjectStorage * object_storage; - std::string container; - RelativePathsWithMetadata keys; - - ActionsDAGPtr filter_dag; - NamesAndTypesList virtual_columns; - - std::atomic index = 0; - }; - - StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_); - ~StorageAzureBlobSource() override; - - Chunk generate() override; - - String getName() const override; - -private: - void addNumRowsToCache(const String & path, size_t num_rows); - std::optional tryGetNumRowsFromCache(const RelativePathWithMetadata & path_with_metadata); - - NamesAndTypesList requested_columns; - NamesAndTypesList requested_virtual_columns; - String format; - String name; - Block sample_block; - std::optional format_settings; - ColumnsDescription columns_desc; - UInt64 max_block_size; - String compression_hint; - AzureObjectStorage * object_storage; - String container; - String connection_url; - std::shared_ptr file_iterator; - bool need_only_count; - size_t total_rows_in_file = 0; - - struct ReaderHolder - { - public: - ReaderHolder( - RelativePathWithMetadata relative_path_with_metadata_, - std::unique_ptr read_buf_, - std::shared_ptr source_, - std::unique_ptr pipeline_, - std::unique_ptr reader_) - : relative_path_with_metadata(std::move(relative_path_with_metadata_)) - , read_buf(std::move(read_buf_)) - , source(std::move(source_)) - , pipeline(std::move(pipeline_)) - , reader(std::move(reader_)) - { - } - - ReaderHolder() = default; - ReaderHolder(const ReaderHolder & other) = delete; - ReaderHolder & operator=(const ReaderHolder & other) = delete; - - ReaderHolder(ReaderHolder && other) noexcept - { - *this = std::move(other); - } - - ReaderHolder & operator=(ReaderHolder && other) noexcept - { - /// The order of destruction is important. - /// reader uses pipeline, pipeline uses read_buf. - reader = std::move(other.reader); - pipeline = std::move(other.pipeline); - source = std::move(other.source); - read_buf = std::move(other.read_buf); - relative_path_with_metadata = std::move(other.relative_path_with_metadata); - return *this; - } - - explicit operator bool() const { return reader != nullptr; } - PullingPipelineExecutor * operator->() { return reader.get(); } - const PullingPipelineExecutor * operator->() const { return reader.get(); } - const String & getRelativePath() const { return relative_path_with_metadata.relative_path; } - const RelativePathWithMetadata & getRelativePathWithMetadata() const { return relative_path_with_metadata; } - const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } - - private: - RelativePathWithMetadata relative_path_with_metadata; - std::unique_ptr read_buf; - std::shared_ptr source; - std::unique_ptr pipeline; - std::unique_ptr reader; - }; - - ReaderHolder reader; - - LoggerPtr log = getLogger("StorageAzureBlobSource"); - - ThreadPool create_reader_pool; - ThreadPoolCallbackRunnerUnsafe create_reader_scheduler; - std::future reader_future; - - /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(); - std::future createReaderAsync(); - - std::unique_ptr createAzureReadBuffer(const String & key, size_t object_size); - std::unique_ptr createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size); -}; - -} - -#endif diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp deleted file mode 100644 index a80d121567a..00000000000 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "Storages/StorageAzureBlobCluster.h" - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageAzureBlobCluster::StorageAzureBlobCluster( - const String & cluster_name_, - const StorageAzureBlob::Configuration & configuration_, - std::unique_ptr && object_storage_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const ContextPtr & context) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageAzureBlobCluster (" + table_id_.table_name + ")")) - , configuration{configuration_} - , object_storage(std::move(object_storage_)) -{ - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function - if (configuration.format == "auto") - std::tie(columns, configuration.format) = StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context); - else - columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context).second; - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); - - TableFunctionAzureBlobStorageCluster::updateStructureAndFormatArgumentsIfNeeded( - expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), configuration.format, context); -} - -RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared( - object_storage.get(), configuration.container, configuration.blob_path, - predicate, getVirtualsList(), context, nullptr); - - auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); - return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; -} - -} - -#endif diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h deleted file mode 100644 index eff4d70f1bd..00000000000 --- a/src/Storages/StorageAzureBlobCluster.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -#include "Client/Connection.h" -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageAzureBlobCluster : public IStorageCluster -{ -public: - StorageAzureBlobCluster( - const String & cluster_name_, - const StorageAzureBlob::Configuration & configuration_, - std::unique_ptr && object_storage_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const ContextPtr & context); - - std::string getName() const override { return "AzureBlobStorageCluster"; } - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -private: - void updateBeforeRead(const ContextPtr & /*context*/) override {} - - void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; - - StorageAzureBlob::Configuration configuration; - std::unique_ptr object_storage; -}; - - -} - -#endif diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 65af5145bec..a3f6b6afc5d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -281,7 +281,7 @@ void StorageBuffer::read( if (!dest_columns.hasPhysical(column_name)) { LOG_WARNING(log, "Destination table {} doesn't have column {}. The default values are used.", destination_id.getNameForLogs(), backQuoteIfNeed(column_name)); - boost::range::remove_erase(columns_intersection, column_name); + std::erase(columns_intersection, column_name); continue; } const auto & dst_col = dest_columns.getPhysical(column_name); @@ -302,6 +302,8 @@ void StorageBuffer::read( auto src_table_query_info = query_info; if (src_table_query_info.prewhere_info) { + src_table_query_info.prewhere_info = src_table_query_info.prewhere_info->clone(); + auto actions_dag = ActionsDAG::makeConvertingActions( header_after_adding_defaults.getColumnsWithTypeAndName(), header.getColumnsWithTypeAndName(), diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 6c15c7e0238..cd6dd7b933f 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -89,6 +89,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool /*async_insert*/) override; void startup() override; diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index a414e4586d9..17e4efda2cd 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -20,7 +20,7 @@ friend class TableFunctionDictionary; public: /// Specifies where the table is located relative to the dictionary. - enum class Location + enum class Location : uint8_t { /// Table was created automatically as an element of a database with the Dictionary engine. DictionaryDatabase, diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 69d3cf3ad3b..9c58468c4a4 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -193,8 +193,7 @@ UInt64 getMaximumFileNumber(const std::string & dir_path) throw; } - if (num > res) - res = num; + res = std::max(num, res); } return res; @@ -701,7 +700,7 @@ static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr auto name_in_storage = Nested::splitName(required_column).first; auto column_in_storage = all_columns.tryGetPhysical(name_in_storage); - if (column_in_storage && column_in_storage->type->hasDynamicSubcolumns()) + if (column_in_storage && column_in_storage->type->hasDynamicSubcolumnsDeprecated()) return true; } @@ -927,7 +926,8 @@ void StorageDistributed::read( sharding_key_expr, sharding_key_column_name, distributed_settings, - additional_shard_filter_generator); + additional_shard_filter_generator, + /* is_remote_function= */ static_cast(owned_cluster)); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. if (!query_plan.isInitialized()) @@ -1986,9 +1986,18 @@ void registerStorageDistributed(StorageFactory & factory) bool StorageDistributed::initializeDiskOnConfigChange(const std::set & new_added_disks) { - if (!data_volume) + if (!storage_policy || !data_volume) return true; + auto new_storage_policy = getContext()->getStoragePolicy(storage_policy->getName()); + auto new_data_volume = new_storage_policy->getVolume(0); + if (new_storage_policy->getVolumes().size() > 1) + LOG_WARNING(log, "Storage policy for Distributed table has multiple volumes. " + "Only {} volume will be used to store data. Other will be ignored.", data_volume->getName()); + + std::atomic_store(&storage_policy, new_storage_policy); + std::atomic_store(&data_volume, new_data_volume); + for (auto & disk : data_volume->getDisks()) { if (new_added_disks.contains(disk->getName())) diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 3a7e63aef50..85a8de86953 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -85,6 +85,7 @@ public: bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } StoragePolicyPtr getStoragePolicy() const override; diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h index ae9bf2483e1..572dc07b269 100644 --- a/src/Storages/StorageDummy.h +++ b/src/Storages/StorageDummy.h @@ -26,6 +26,7 @@ public: } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool canMoveConditionsToPrewhere() const override { diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index d1968daa1f1..9d12a1569d8 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -252,4 +252,13 @@ AccessType StorageFactory::getSourceAccessType(const String & table_engine) cons return it->second.features.source_access_type; } + +const StorageFactory::StorageFeatures & StorageFactory::getStorageFeatures(const String & storage_name) const +{ + auto it = storages.find(storage_name); + if (it == storages.end()) + throw Exception(ErrorCodes::UNKNOWN_STORAGE, "Unknown table engine {}", storage_name); + return it->second.features; +} + } diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index 50ace6aaad7..f3603419651 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -129,12 +129,8 @@ public: AccessType getSourceAccessType(const String & table_engine) const; - bool checkIfStorageSupportsSchemaInterface(const String & storage_name) - { - if (storages.contains(storage_name)) - return storages[storage_name].features.supports_schema_inference; - return false; - } + const StorageFeatures & getStorageFeatures(const String & storage_name) const; + private: Storages storages; }; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 430e68d8562..6744159d5dc 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -224,7 +225,7 @@ void checkCreationIsAllowed( { auto table_path_stat = fs::status(table_path); if (fs::exists(table_path_stat) && fs::is_directory(table_path_stat)) - throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "File must not be a directory"); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "File {} must not be a directory", table_path); } } @@ -273,7 +274,7 @@ std::unique_ptr selectReadBuffer( if (S_ISREG(file_stat.st_mode) && (read_method == LocalFSReadMethod::pread || read_method == LocalFSReadMethod::mmap)) { if (use_table_fd) - res = std::make_unique(table_fd); + res = std::make_unique(table_fd, context->getSettingsRef().max_read_buffer_size); else res = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); @@ -282,10 +283,7 @@ std::unique_ptr selectReadBuffer( else if (read_method == LocalFSReadMethod::io_uring && !use_table_fd) { #if USE_LIBURING - auto & reader = context->getIOURingReader(); - if (!reader.isSupported()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); - + auto & reader = getIOUringReaderOrThrow(context); res = std::make_unique( reader, Priority{}, @@ -298,7 +296,7 @@ std::unique_ptr selectReadBuffer( else { if (use_table_fd) - res = std::make_unique(table_fd); + res = std::make_unique(table_fd, context->getSettingsRef().max_read_buffer_size); else res = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); @@ -1536,7 +1534,8 @@ private: void ReadFromFile::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 945ee4f369f..37da59c3664 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -90,6 +90,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; @@ -100,7 +102,7 @@ public: { std::vector paths_to_archives; std::string path_in_archive; // used when reading a single file from archive - IArchiveReader::NameFilter filter = {}; // used when files inside archive are defined with a glob + IArchiveReader::NameFilter filter; // used when files inside archive are defined with a glob bool isSingleFileRead() const { diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index 973d595bbf0..f5a4362901e 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -32,6 +32,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index fbfc67f4c7c..9950d41f1c2 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -458,7 +458,7 @@ void fuzzJSONObject( void fuzzJSONObject(std::shared_ptr n, WriteBuffer & out, const StorageFuzzJSON::Configuration & config, pcg64 & rnd) { size_t node_count = 0; - return fuzzJSONObject(n, out, config, rnd, /*depth*/ 0, node_count); + fuzzJSONObject(n, out, config, rnd, /*depth*/ 0, node_count); } class FuzzJSONSource : public ISource diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index fbce6c2bb7d..2190e012c5b 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -30,12 +31,9 @@ #include #include #include -#include #include -#include - namespace DB { @@ -269,6 +267,9 @@ ColumnPtr fillColumnWithRandomData( case TypeIndex::Tuple: { auto elements = typeid_cast(type.get())->getElements(); + if (elements.empty()) + return ColumnTuple::create(limit); + const size_t tuple_size = elements.size(); Columns tuple_columns(tuple_size); @@ -639,7 +640,7 @@ void registerStorageGenerateRandom(StorageFactory & factory) Pipe StorageGenerateRandom::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, @@ -682,7 +683,14 @@ Pipe StorageGenerateRandom::read( pcg64 generate(random_seed); for (UInt64 i = 0; i < num_streams; ++i) - pipes.emplace_back(std::make_shared(max_block_size, max_array_length, max_string_length, generate(), block_header, context)); + { + auto source = std::make_shared(max_block_size, max_array_length, max_string_length, generate(), block_header, context); + + if (i == 0 && query_info.limit) + source->addTotalRowsApprox(query_info.limit); + + pipes.emplace_back(std::move(source)); + } return Pipe::unitePipes(std::move(pipes)); } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 1ac739f03fd..a5bae0acce5 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -628,7 +628,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns) const auto * available_type = it->getMapped(); - if (!available_type->hasDynamicSubcolumns() + if (!available_type->hasDynamicSubcolumnsDeprecated() && !column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( @@ -676,7 +676,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, const auto * provided_column_type = it->getMapped(); const auto * available_column_type = jt->getMapped(); - if (!provided_column_type->hasDynamicSubcolumns() + if (!provided_column_type->hasDynamicSubcolumnsDeprecated() && !provided_column_type->equals(*available_column_type) && !isCompatibleEnumTypes(available_column_type, provided_column_type)) throw Exception( @@ -720,7 +720,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const listOfColumns(available_columns)); const auto * available_type = it->getMapped(); - if (!available_type->hasDynamicSubcolumns() + if (!available_type->hasDynamicSubcolumnsDeprecated() && !column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index b0b7afdfe8d..20f99070000 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -846,7 +846,7 @@ void StorageKeeperMap::restoreDataImpl( bool allow_non_empty_tables, const DiskPtr & temporary_disk) { - auto table_id = toString(getStorageID().uuid); + const auto & table_id = toString(getStorageID().uuid); fs::path data_path_in_backup_fs = data_path_in_backup; @@ -960,7 +960,7 @@ std::optional StorageKeeperMap::isTableValid() const { std::lock_guard lock{init_mutex}; if (table_is_valid.has_value()) - return *table_is_valid; + return table_is_valid; [&] { diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index b652750346f..08e0526550d 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -254,7 +254,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu if (!deserialize_states.contains(name)) { settings.getter = create_stream_getter(true); - serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]); + serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name], nullptr); } settings.getter = create_stream_getter(false); @@ -833,8 +833,7 @@ Pipe StorageLog::read( size_t num_marks = marks_with_real_row_count.size(); size_t max_streams = use_marks_file ? num_marks : 1; - if (num_streams > max_streams) - num_streams = max_streams; + num_streams = std::min(num_streams, max_streams); std::vector offsets; offsets.resize(num_data_files, 0); diff --git a/src/Storages/StorageLoop.cpp b/src/Storages/StorageLoop.cpp new file mode 100644 index 00000000000..2062749e60b --- /dev/null +++ b/src/Storages/StorageLoop.cpp @@ -0,0 +1,49 @@ +#include "StorageLoop.h" +#include +#include +#include + + +namespace DB +{ + namespace ErrorCodes + { + + } + StorageLoop::StorageLoop( + const StorageID & table_id_, + StoragePtr inner_storage_) + : IStorage(table_id_) + , inner_storage(std::move(inner_storage_)) + { + StorageInMemoryMetadata storage_metadata = inner_storage->getInMemoryMetadata(); + setInMemoryMetadata(storage_metadata); + } + + + void StorageLoop::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) + { + query_info.optimize_trivial_count = false; + + query_plan.addStep(std::make_unique( + column_names, query_info, storage_snapshot, context, processed_stage, inner_storage, max_block_size, num_streams + )); + } + + void registerStorageLoop(StorageFactory & factory) + { + factory.registerStorage("Loop", [](const StorageFactory::Arguments & args) + { + StoragePtr inner_storage; + return std::make_shared(args.table_id, inner_storage); + }); + } +} diff --git a/src/Storages/StorageLoop.h b/src/Storages/StorageLoop.h new file mode 100644 index 00000000000..48760b169c2 --- /dev/null +++ b/src/Storages/StorageLoop.h @@ -0,0 +1,33 @@ +#pragma once +#include "config.h" +#include + + +namespace DB +{ + + class StorageLoop final : public IStorage + { + public: + StorageLoop( + const StorageID & table_id, + StoragePtr inner_storage_); + + std::string getName() const override { return "Loop"; } + + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return false; } + + private: + StoragePtr inner_storage; + }; +} diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 696865dfa2f..735f51e1f32 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -58,7 +59,7 @@ static inline String generateInnerTableName(const StorageID & view_id) return ".inner." + view_id.getTableName(); } -/// Remove columns from target_header that does not exists in src_header +/// Remove columns from target_header that does not exist in src_header static void removeNonCommonColumns(const Block & src_header, Block & target_header) { std::set target_only_positions; @@ -100,6 +101,7 @@ StorageMaterializedView::StorageMaterializedView( if (query.sql_security) storage_metadata.setSQLSecurity(query.sql_security->as()); + /// Materialized view doesn't support SQL SECURITY INVOKER. if (storage_metadata.sql_security_type == SQLSecurityType::INVOKER) throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "SQL SECURITY INVOKER can't be specified for MATERIALIZED VIEW"); @@ -150,6 +152,9 @@ StorageMaterializedView::StorageMaterializedView( } else { + const String & engine = query.storage->engine->name; + const auto & storage_features = StorageFactory::instance().getStorageFeatures(engine); + /// We will create a query to create an internal table. auto create_context = Context::createCopy(local_context); auto manual_create_query = std::make_shared(); @@ -159,6 +164,22 @@ StorageMaterializedView::StorageMaterializedView( auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); + if (storage_features.supports_skipping_indices) + { + if (query.columns_list->indices) + new_columns_list->set(new_columns_list->indices, query.columns_list->indices->ptr()); + if (query.columns_list->constraints) + new_columns_list->set(new_columns_list->constraints, query.columns_list->constraints->ptr()); + if (query.columns_list->primary_key) + new_columns_list->set(new_columns_list->primary_key, query.columns_list->primary_key->ptr()); + if (query.columns_list->primary_key_from_columns) + new_columns_list->set(new_columns_list->primary_key_from_columns, query.columns_list->primary_key_from_columns->ptr()); + } + if (storage_features.supports_projections) + { + if (query.columns_list->projections) + new_columns_list->set(new_columns_list->projections, query.columns_list->projections->ptr()); + } manual_create_query->set(manual_create_query->columns_list, new_columns_list); manual_create_query->set(manual_create_query->storage, query.storage->ptr()); @@ -172,6 +193,7 @@ StorageMaterializedView::StorageMaterializedView( if (query.refresh_strategy) { + fixed_uuid = false; refresher = RefreshTask::create( *this, getContext(), @@ -219,8 +241,10 @@ void StorageMaterializedView::read( context->checkAccess(AccessType::SELECT, getInMemoryMetadataPtr()->select.select_table_id, column_names); auto storage_id = storage->getStorageID(); + + /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` /// We don't need to check access if the inner table was created automatically. - if (!has_inner_table && !storage_id.empty()) + if (!has_inner_table && !storage_id.empty() && getInMemoryMetadataPtr()->sql_security_type) context->checkAccess(AccessType::SELECT, storage_id, column_names); storage->read(query_plan, column_names, target_storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); @@ -230,10 +254,10 @@ void StorageMaterializedView::read( auto mv_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, context, processed_stage); auto target_header = query_plan.getCurrentDataStream().header; - /// No need to convert columns that does not exists in MV + /// No need to convert columns that does not exist in MV removeNonCommonColumns(mv_header, target_header); - /// No need to convert columns that does not exists in the result header. + /// No need to convert columns that does not exist in the result header. /// /// Distributed storage may process query up to the specific stage, and /// so the result header may not include all the columns from the @@ -268,8 +292,10 @@ SinkToStoragePtr StorageMaterializedView::write(const ASTPtr & query, const Stor auto metadata_snapshot = storage->getInMemoryMetadataPtr(); auto storage_id = storage->getStorageID(); + + /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` /// We don't need to check access if the inner table was created automatically. - if (!has_inner_table && !storage_id.empty()) + if (!has_inner_table && !storage_id.empty() && getInMemoryMetadataPtr()->sql_security_type) { auto query_sample_block = InterpreterInsertQuery::getSampleBlock(query->as(), storage, metadata_snapshot, context); context->checkAccess(AccessType::INSERT, storage_id, query_sample_block.getNames()); @@ -597,7 +623,7 @@ void StorageMaterializedView::backupData(BackupEntriesCollector & backup_entries void StorageMaterializedView::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) { if (hasInnerTable()) - return getTargetTable()->restoreDataFromBackup(restorer, data_path_in_backup, partitions); + getTargetTable()->restoreDataFromBackup(restorer, data_path_in_backup, partitions); } bool StorageMaterializedView::supportsBackupPartition() const @@ -662,10 +688,14 @@ void StorageMaterializedView::onActionLockRemove(StorageActionBlockType action_t refresher->start(); } -DB::StorageID StorageMaterializedView::getTargetTableId() const +StorageID StorageMaterializedView::getTargetTableId() const { std::lock_guard guard(target_table_id_mutex); - return target_table_id; + auto id = target_table_id; + /// TODO: Avoid putting uuid into target_table_id in the first place, instead of clearing it here. + if (!fixed_uuid) + id.uuid = UUIDHelpers::Nil; + return id; } void StorageMaterializedView::setTargetTableId(DB::StorageID id) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 198b7a642ee..5ecd2ec3819 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -32,6 +32,7 @@ public: bool supportsFinal() const override { return getTargetTable()->supportsFinal(); } bool supportsParallelInsert() const override { return getTargetTable()->supportsParallelInsert(); } bool supportsSubcolumns() const override { return getTargetTable()->supportsSubcolumns(); } + bool supportsDynamicSubcolumns() const override { return getTargetTable()->supportsDynamicSubcolumns(); } bool supportsTransactions() const override { return getTargetTable()->supportsTransactions(); } SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; @@ -110,6 +111,10 @@ private: bool has_inner_table = false; + /// If false, inner table is replaced on each refresh. In that case, target_table_id doesn't + /// have UUID, and we do inner table lookup by name instead. + bool fixed_uuid = true; + friend class RefreshTask; void checkStatementCanBeForwarded() const; diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 50581aa0d61..5d269cf814d 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -60,6 +60,7 @@ public: bool supportsParallelInsert() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } /// Smaller blocks (e.g. 64K rows) are better for CPU cache. diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 5eceddfe06d..4c678a1228b 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -472,7 +472,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu { size_t tables_count = selected_tables.size(); Float64 num_streams_multiplier = std::min( - static_cast(tables_count), + tables_count, std::max(1UL, static_cast(context->getSettingsRef().max_streams_multiplier_for_merge_tables))); size_t num_streams = static_cast(requested_num_streams * num_streams_multiplier); @@ -519,7 +519,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ size_t tables_count = selected_tables.size(); Float64 num_streams_multiplier - = std::min(static_cast(tables_count), std::max(1UL, static_cast(context->getSettingsRef().max_streams_multiplier_for_merge_tables))); + = std::min(tables_count, std::max(1UL, static_cast(context->getSettingsRef().max_streams_multiplier_for_merge_tables))); size_t num_streams = static_cast(requested_num_streams * num_streams_multiplier); size_t remaining_streams = num_streams; @@ -1622,7 +1622,7 @@ void ReadFromMerge::applyFilters(const QueryPlan & plan, const ActionDAGNodes & void ReadFromMerge::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(added_filter_nodes); filterTablesAndCreateChildrenPlans(); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index a63ea1e32ef..735c8711a63 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -49,6 +49,7 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } bool supportsPrewhere() const override { return tableSupportsPrewhere(); } std::optional supportedPrewhereColumns() const override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index da36d045fc8..27a76f4f21d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -153,7 +153,7 @@ void StorageMergeTree::startup() { background_operations_assignee.start(); startBackgroundMovesIfNeeded(); - startOutdatedDataPartsLoadingTask(); + startOutdatedAndUnexpectedDataPartsLoadingTask(); } catch (...) { @@ -179,7 +179,7 @@ void StorageMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; - stopOutdatedDataPartsLoadingTask(); + stopOutdatedAndUnexpectedDataPartsLoadingTask(); /// Unlock all waiting mutations { @@ -235,16 +235,11 @@ void StorageMergeTree::read( = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } - ClusterProxy::SelectStreamFactory select_stream_factory = - ClusterProxy::SelectStreamFactory( - header, - {}, - storage_snapshot, - processed_stage); - ClusterProxy::executeQueryWithParallelReplicas( query_plan, - select_stream_factory, + getStorageID(), + header, + processed_stage, modified_query_ast, local_context, query_info.storage_limits); @@ -338,17 +333,21 @@ void StorageMergeTree::alter( auto table_id = getStorageID(); auto old_storage_settings = getSettings(); + const auto & query_settings = local_context->getSettingsRef(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); - auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, local_context->getSettingsRef().materialize_ttl_after_modify, local_context); + auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, query_settings.materialize_ttl_after_modify, local_context); if (!maybe_mutation_commands.empty()) delayMutationOrThrowIfNeeded(nullptr, local_context); Int64 mutation_version = -1; commands.apply(new_metadata, local_context); + if (!query_settings.allow_suspicious_primary_key) + MergeTreeData::verifySortingKey(new_metadata.sorting_key); + /// This alter can be performed at new_metadata level only if (commands.isSettingsAlter()) { @@ -401,7 +400,7 @@ void StorageMergeTree::alter( resetObjectColumnsFromActiveParts(parts_lock); } - if (!maybe_mutation_commands.empty() && local_context->getSettingsRef().alter_sync > 0) + if (!maybe_mutation_commands.empty() && query_settings.alter_sync > 0) waitForMutation(mutation_version, false); } @@ -2120,16 +2119,36 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - my_metadata_snapshot, - clone_params, - local_context->getReadSettings(), - local_context->getWriteSettings()); - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); + if (replace) + { + /// Replace can only work on the same disk + auto [dst_part, part_lock] = cloneAndLoadDataPart( + src_part, + TMP_PREFIX, + dst_part_info, + my_metadata_snapshot, + clone_params, + local_context->getReadSettings(), + local_context->getWriteSettings(), + true/*must_on_same_disk*/); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + else + { + /// Attach can work on another disk + auto [dst_part, part_lock] = cloneAndLoadDataPart( + src_part, + TMP_PREFIX, + dst_part_info, + my_metadata_snapshot, + clone_params, + local_context->getReadSettings(), + local_context->getWriteSettings(), + false/*must_on_same_disk*/); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } } /// ATTACH empty part set @@ -2234,14 +2253,15 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const .copy_instead_of_hardlink = getSettings()->always_use_copy_instead_of_hardlinks, }; - auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk( + auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPart( src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params, local_context->getReadSettings(), - local_context->getWriteSettings() + local_context->getWriteSettings(), + true/*must_on_same_disk*/ ); dst_parts.emplace_back(std::move(dst_part)); diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp index 4747232d7f7..0b1ad02f8c9 100644 --- a/src/Storages/StorageMergeTreeIndex.cpp +++ b/src/Storages/StorageMergeTreeIndex.cpp @@ -280,7 +280,8 @@ private: void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 64bb2f4609b..62a2a048642 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -192,7 +192,7 @@ private: else if (which.isFloat32()) document.add(name, static_cast(column.getFloat32(idx))); else if (which.isFloat64()) - document.add(name, static_cast(column.getFloat64(idx))); + document.add(name, column.getFloat64(idx)); else if (which.isDate()) document.add(name, Poco::Timestamp(DateLUT::instance().fromDayNum(DayNum(column.getUInt(idx))) * 1000000)); else if (which.isDateTime()) diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index f7ee936db8d..74abf931f8f 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -48,6 +48,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, bool) override { return std::make_shared(metadata_snapshot->getSampleBlock()); diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index a8e7fd528dd..5cd86f7ad2c 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -62,7 +62,7 @@ public: size_t max_block_size, size_t num_streams) override { - return getNested()->read(query_plan, column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); + getNested()->read(query_plan, column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); } SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 837ff68417f..e18e66d7af9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -187,7 +186,6 @@ namespace ErrorCodes extern const int NOT_INITIALIZED; extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int TABLE_IS_DROPPED; - extern const int CANNOT_BACKUP_TABLE; extern const int SUPPORT_IS_DISABLED; extern const int FAULT_INJECTED; extern const int CANNOT_FORGET_PARTITION; @@ -310,8 +308,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( true, /// require_part_metadata mode, [this] (const std::string & name) { enqueuePartForCheck(name); }) - , zookeeper_name(zkutil::extractZooKeeperName(zookeeper_path_)) - , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ mode <= LoadingStrictnessLevel::CREATE, log.load())) + , full_zookeeper_path(zookeeper_path_) + , zookeeper_name(zkutil::extractZooKeeperName(full_zookeeper_path)) + , zookeeper_path(zkutil::extractZooKeeperPath(full_zookeeper_path, /* check_starts_with_slash */ mode <= LoadingStrictnessLevel::CREATE, log.load())) , replica_name(replica_name_) , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_) , reader(*this) @@ -591,6 +590,9 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( LOG_DEBUG(log, "Waiting for {} to apply mutation {}", replica, mutation_id); zkutil::EventPtr wait_event = std::make_shared(); + constexpr size_t MAX_RETRIES_ON_FAILED_MUTATION = 30; + size_t retries_on_failed_mutation = 0; + while (!partial_shutdown_called) { /// Mutation maybe killed or whole replica was deleted. @@ -638,18 +640,32 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( } } - /// If mutation status is empty, than local replica may just not loaded it into memory. - if (mutation_status && !mutation_status->latest_fail_reason.empty()) - { - LOG_DEBUG(log, "Mutation {} is done {} or failed {} (status: '{}')", mutation_id, mutation_status->is_done, !mutation_status->latest_fail_reason.empty(), mutation_status->latest_fail_reason); - break; - } - /// Replica can become inactive, so wait with timeout, if nothing happened -> recheck it if (!wait_event->tryWait(1000)) { LOG_TRACE(log, "Failed to wait for mutation '{}', will recheck", mutation_id); } + + /// If mutation status is empty, than local replica may just not loaded it into memory. + if (mutation_status && !mutation_status->latest_fail_reason.empty()) + { + LOG_DEBUG(log, "Mutation {} is done {} or failed {} (status: '{}')", mutation_id, mutation_status->is_done, !mutation_status->latest_fail_reason.empty(), mutation_status->latest_fail_reason); + + /// In some cases latest_fail_reason may be retryable and there's a chance it will be cleared after the next attempt + if (++retries_on_failed_mutation <= MAX_RETRIES_ON_FAILED_MUTATION) + continue; + + if (mutation_status->is_done) + { + LOG_DEBUG(log, "Looks like mutation {} is done, rechecking", mutation_id); + continue; + } + + /// It's still possible that latest_fail_reason will be cleared just before queue.getIncompleteMutationsStatus(...) below, + /// but it's unlikely. Anyway, rethrow the exception here to avoid exiting with is_done=false + checkMutationStatus(mutation_status, {mutation_id}); + throw Exception(ErrorCodes::LOGICAL_ERROR, "checkMutationStatus didn't throw when checking status of {}: {}", mutation_id, mutation_status->latest_fail_reason); + } } /// This replica inactive, don't check anything @@ -1576,18 +1592,12 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) * But actually we can't precisely determine that ALL missing parts * covered by this unexpected part. So missing parts will be downloaded. */ - DataParts unexpected_parts; - /// Intersection of local parts and expected parts ActiveDataPartSet local_expected_parts_set(format_version); - /// Collect unexpected parts for (const auto & part : parts) { - if (expected_parts.contains(part->name)) - local_expected_parts_set.add(part->name); - else - unexpected_parts.insert(part); /// this parts we will place to detached with ignored_ prefix + local_expected_parts_set.add(part->name); } /// Which parts should be taken from other replicas. @@ -1599,18 +1609,15 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch); + waitForUnexpectedPartsToBeLoaded(); + ActiveDataPartSet set_of_empty_unexpected_parts(format_version); - for (const auto & part : parts) + for (const auto & load_state : unexpected_data_parts) { - if (part->rows_count || part->getState() != MergeTreeDataPartState::Active || expected_parts.contains(part->name)) + if (load_state.is_broken || load_state.part->rows_count || !load_state.uncovered) continue; - if (incomplete_list_of_outdated_parts) - { - LOG_INFO(log, "Outdated parts are not loaded yet, but we may need them to handle dropped parts. Need retry."); - return false; - } - set_of_empty_unexpected_parts.add(part->name); + set_of_empty_unexpected_parts.add(load_state.part->name); } if (auto empty_count = set_of_empty_unexpected_parts.size()) LOG_WARNING(log, "Found {} empty unexpected parts (probably some dropped parts were not cleaned up before restart): [{}]", @@ -1629,33 +1636,35 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) std::unordered_set restorable_unexpected_parts; UInt64 uncovered_unexpected_parts_rows = 0; - for (const auto & part : unexpected_parts) + for (const auto & load_state : unexpected_data_parts) { - unexpected_parts_rows += part->rows_count; + if (load_state.is_broken) + continue; + unexpected_parts_rows += load_state.part->rows_count; /// This part may be covered by some expected part that is active and present locally /// Probably we just did not remove this part from disk before restart (but removed from ZooKeeper) - String covering_local_part = local_expected_parts_set.getContainingPart(part->name); + String covering_local_part = local_expected_parts_set.getContainingPart(load_state.part->name); if (!covering_local_part.empty()) { - covered_unexpected_parts.push_back(part->name); + covered_unexpected_parts.push_back(load_state.part->name); continue; } - String covering_empty_part = set_of_empty_unexpected_parts.getContainingPart(part->name); + String covering_empty_part = set_of_empty_unexpected_parts.getContainingPart(load_state.part->name); if (!covering_empty_part.empty()) { LOG_INFO(log, "Unexpected part {} is covered by empty part {}, assuming it has been dropped just before restart", - part->name, covering_empty_part); - covered_unexpected_parts.push_back(part->name); + load_state.part->name, covering_empty_part); + covered_unexpected_parts.push_back(load_state.part->name); continue; } - auto covered_parts = local_expected_parts_set.getPartInfosCoveredBy(part->info); + auto covered_parts = local_expected_parts_set.getPartInfosCoveredBy(load_state.part->info); - if (MergeTreePartInfo::areAllBlockNumbersCovered(part->info, covered_parts)) + if (MergeTreePartInfo::areAllBlockNumbersCovered(load_state.part->info, covered_parts)) { - restorable_unexpected_parts.insert(part->name); + restorable_unexpected_parts.insert(load_state.part->name); continue; } @@ -1669,13 +1678,13 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) } /// Part is unexpected and we don't have covering part: it's suspicious - uncovered_unexpected_parts.insert(part->name); - uncovered_unexpected_parts_rows += part->rows_count; + uncovered_unexpected_parts.insert(load_state.part->name); + uncovered_unexpected_parts_rows += load_state.part->rows_count; - if (part->info.level > 0) + if (load_state.part->info.level > 0) { ++unexpected_parts_nonnew; - unexpected_parts_nonnew_rows += part->rows_count; + unexpected_parts_nonnew_rows += load_state.part->rows_count; } } @@ -1701,6 +1710,9 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) UInt64 total_rows_on_filesystem = 0; for (const auto & part : parts) total_rows_on_filesystem += part->rows_count; + /// We need to sum the rows count of all unexpected data parts; + for (const auto & part : unexpected_data_parts) + total_rows_on_filesystem += part.part->rows_count; const auto storage_settings_ptr = getSettings(); bool insane = uncovered_unexpected_parts_rows > total_rows_on_filesystem * storage_settings_ptr->replicated_max_ratio_of_wrong_parts; @@ -1742,13 +1754,12 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) /// Add to the queue jobs to pick up the missing parts from other replicas and remove from ZK the information that we have them. queue.setBrokenPartsToEnqueueFetchesOnLoading(std::move(parts_to_fetch)); - /// Remove extra local parts. - for (const DataPartPtr & part : unexpected_parts) + /// detached all unexpected data parts after sanity check. + for (auto & part_state : unexpected_data_parts) { - bool restore_covered = restorable_unexpected_parts.contains(part->name) || uncovered_unexpected_parts.contains(part->name); - LOG_ERROR(log, "Renaming unexpected part {} to ignored_{}{}", part->name, part->name, restore_covered ? ", restoring covered parts" : ""); - forcefullyMovePartToDetachedAndRemoveFromMemory(part, "ignored", restore_covered); + part_state.part->renameToDetached("ignored"); } + unexpected_data_parts.clear(); return true; } @@ -1984,7 +1995,6 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo return {}; const MergeTreePartInfo actual_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); - const String part_new_name = actual_part_info.getPartNameV1(); for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { @@ -2793,7 +2803,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) auto obtain_part = [&] (PartDescriptionPtr & part_desc) { - /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPartOnSameDisk will do full copy. + /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPart(must_on_same_disk=true) will do full copy. /// It's okay to check the setting for current table and disk for the source table, because src and dst part are on the same disk. bool prefer_fetch_from_other_replica = !part_desc->replica.empty() && storage_settings_ptr->allow_remote_fs_zero_copy_replication && part_desc->src_table_part && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(); @@ -2812,14 +2822,15 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( + auto [res_part, temporary_part_lock] = cloneAndLoadDataPart( part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, clone_params, getContext()->getReadSettings(), - getContext()->getWriteSettings()); + getContext()->getWriteSettings(), + true/*must_on_same_disk*/); part_desc->res_part = std::move(res_part); part_desc->temporary_part_lock = std::move(temporary_part_lock); } @@ -3769,7 +3780,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() const Names deduplicate_by_columns = {}; CreateMergeEntryResult create_result = CreateMergeEntryResult::Other; - enum class AttemptStatus + enum class AttemptStatus : uint8_t { EntryCreated, NeedRetry, @@ -3917,12 +3928,9 @@ void StorageReplicatedMergeTree::mergeSelectingTask() else if (result == AttemptStatus::CannotSelect) new_sleep_ms *= storage_settings_ptr->merge_selecting_sleep_slowdown_factor; new_sleep_ms *= std::uniform_real_distribution(1.f, 1.1f)(thread_local_rng); - merge_selecting_sleep_ms = static_cast(new_sleep_ms); - - if (merge_selecting_sleep_ms < storage_settings_ptr->merge_selecting_sleep_ms) - merge_selecting_sleep_ms = storage_settings_ptr->merge_selecting_sleep_ms; - if (merge_selecting_sleep_ms > storage_settings_ptr->max_merge_selecting_sleep_ms) - merge_selecting_sleep_ms = storage_settings_ptr->max_merge_selecting_sleep_ms; + merge_selecting_sleep_ms = std::clamp(static_cast(new_sleep_ms), + storage_settings_ptr->merge_selecting_sleep_ms, + storage_settings_ptr->max_merge_selecting_sleep_ms); if (result == AttemptStatus::EntryCreated) merge_selecting_task->schedule(); @@ -4893,14 +4901,15 @@ bool StorageReplicatedMergeTree::fetchPart( .keep_metadata_version = true, }; - auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk( + auto [cloned_part, lock] = cloneAndLoadDataPart( part_to_clone, "tmp_clone_", part_info, metadata_snapshot, clone_params, getContext()->getReadSettings(), - getContext()->getWriteSettings()); + getContext()->getWriteSettings(), + true/*must_on_same_disk*/); part_directory_lock = std::move(lock); return cloned_part; @@ -5138,7 +5147,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart( void StorageReplicatedMergeTree::startup() { LOG_TRACE(log, "Starting up table"); - startOutdatedDataPartsLoadingTask(); + startOutdatedAndUnexpectedDataPartsLoadingTask(); if (attach_thread) { attach_thread->start(); @@ -5341,7 +5350,7 @@ void StorageReplicatedMergeTree::shutdown(bool) } session_expired_callback_handler.reset(); - stopOutdatedDataPartsLoadingTask(); + stopOutdatedAndUnexpectedDataPartsLoadingTask(); partialShutdown(); @@ -5451,12 +5460,11 @@ void StorageReplicatedMergeTree::read( /// 2. Do not read parts that have not yet been written to the quorum of the replicas. /// For this you have to synchronously go to ZooKeeper. if (settings.select_sequential_consistency) - return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); - - if (local_context->canUseParallelReplicasOnInitiator()) - return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage); - - readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); + readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); + else if (local_context->canUseParallelReplicasOnInitiator()) + readParallelReplicasImpl(query_plan, column_names, query_info, local_context, processed_stage); + else + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); } void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( @@ -5481,13 +5489,13 @@ void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( void StorageReplicatedMergeTree::readParallelReplicasImpl( QueryPlan & query_plan, const Names & /*column_names*/, - const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum processed_stage) { ASTPtr modified_query_ast; Block header; + const auto table_id = getStorageID(); if (local_context->getSettingsRef().allow_experimental_analyzer) { @@ -5501,22 +5509,17 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( } else { - const auto table_id = getStorageID(); modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); header = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } - ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( - header, - {}, - storage_snapshot, - processed_stage); - ClusterProxy::executeQueryWithParallelReplicas( query_plan, - select_stream_factory, + table_id, + header, + processed_stage, modified_query_ast, local_context, query_info.storage_limits); @@ -6043,6 +6046,7 @@ void StorageReplicatedMergeTree::alter( assertNotReadonly(); auto table_id = getStorageID(); + const auto & query_settings = query_context->getSettingsRef(); if (commands.isSettingsAlter()) { @@ -6070,6 +6074,13 @@ void StorageReplicatedMergeTree::alter( return; } + if (!query_settings.allow_suspicious_primary_key) + { + StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); + commands.apply(future_metadata, query_context); + + MergeTreeData::verifySortingKey(future_metadata.sorting_key); + } auto ast_to_str = [](ASTPtr query) -> String { @@ -6202,7 +6213,7 @@ void StorageReplicatedMergeTree::alter( auto maybe_mutation_commands = commands.getMutationCommands( *current_metadata, - query_context->getSettingsRef().materialize_ttl_after_modify, + query_settings.materialize_ttl_after_modify, query_context); bool have_mutation = !maybe_mutation_commands.empty(); @@ -6325,7 +6336,7 @@ void StorageReplicatedMergeTree::alter( { LOG_DEBUG(log, "Metadata changes applied. Will wait for data changes."); merge_selecting_task->schedule(); - waitMutation(*mutation_znode, query_context->getSettingsRef().alter_sync); + waitMutation(*mutation_znode, query_settings.alter_sync); LOG_DEBUG(log, "Data changes applied."); } } @@ -7164,8 +7175,7 @@ void StorageReplicatedMergeTree::getReplicaDelays(time_t & out_absolute_delay, t break; } - if (replica_time > max_replicas_unprocessed_insert_time) - max_replicas_unprocessed_insert_time = replica_time; + max_replicas_unprocessed_insert_time = std::max(replica_time, max_replicas_unprocessed_insert_time); } if (have_replica_with_nothing_unprocessed) @@ -7766,7 +7776,7 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(PartsToRemo for (const auto & part : parts) part_names_to_remove.emplace_back(part.getPartName()); - return removePartsFromZooKeeperWithRetries(part_names_to_remove, max_retries); + removePartsFromZooKeeperWithRetries(part_names_to_remove, max_retries); } void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries) @@ -8101,17 +8111,37 @@ void StorageReplicatedMergeTree::replacePartitionFrom( .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - metadata_snapshot, - clone_params, - query_context->getReadSettings(), - query_context->getWriteSettings()); + if (replace) + { + /// Replace can only work on the same disk + auto [dst_part, part_lock] = cloneAndLoadDataPart( + src_part, + TMP_PREFIX, + dst_part_info, + metadata_snapshot, + clone_params, + query_context->getReadSettings(), + query_context->getWriteSettings(), + true/*must_on_same_disk*/); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + else + { + /// Attach can work on another disk + auto [dst_part, part_lock] = cloneAndLoadDataPart( + src_part, + TMP_PREFIX, + dst_part_info, + metadata_snapshot, + clone_params, + query_context->getReadSettings(), + query_context->getWriteSettings(), + false/*must_on_same_disk*/); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } src_parts.emplace_back(src_part); - dst_parts.emplace_back(dst_part); - dst_parts_locks.emplace_back(std::move(part_lock)); ephemeral_locks.emplace_back(std::move(*lock)); block_id_paths.emplace_back(block_id_path); part_checksums.emplace_back(hash_hex); @@ -8367,14 +8397,15 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = dest_metadata_snapshot->getMetadataVersion() }; - auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk( + auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPart( src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params, query_context->getReadSettings(), - query_context->getWriteSettings()); + query_context->getWriteSettings(), + true/*must_on_same_disk*/); src_parts.emplace_back(src_part); dst_parts.emplace_back(dst_part); @@ -8628,9 +8659,9 @@ void StorageReplicatedMergeTree::getCommitPartOps( const String & block_id_path) const { if (block_id_path.empty()) - return getCommitPartOps(ops, part, std::vector()); + getCommitPartOps(ops, part, std::vector()); else - return getCommitPartOps(ops, part, std::vector({block_id_path})); + getCommitPartOps(ops, part, std::vector({block_id_path})); } void StorageReplicatedMergeTree::getCommitPartOps( @@ -9245,24 +9276,6 @@ void StorageReplicatedMergeTree::createTableSharedID() const } -std::optional StorageReplicatedMergeTree::tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context) -{ - auto zk_path = tryExtractZkPathFromCreateQuery(create_query, global_context); - if (!zk_path) - return {}; - - String zk_name = zkutil::extractZooKeeperName(*zk_path); - zk_path = zkutil::extractZooKeeperPath(*zk_path, false, nullptr); - zkutil::ZooKeeperPtr zookeeper = (zk_name == getDefaultZooKeeperName()) ? global_context->getZooKeeper() : global_context->getAuxiliaryZooKeeper(zk_name); - - String id; - if (!zookeeper->tryGet(fs::path(*zk_path) / "table_shared_id", id)) - return {}; - - return id; -} - - zkutil::EphemeralNodeHolderPtr StorageReplicatedMergeTree::lockSharedDataTemporary(const String & part_name, const String & part_id, const DiskPtr & disk) const { auto settings = getSettings(); @@ -10422,21 +10435,10 @@ void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_quer auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, current_metadata).checkAndFindDiff(metadata_from_entry, current_metadata->getColumns(), getContext()); auto adjusted_metadata = metadata_diff.getNewMetadata(columns_from_entry, getContext(), *current_metadata); applyMetadataChangesToCreateQuery(create_query, adjusted_metadata); - - /// Check that tryGetTableSharedIDFromCreateQuery() works for this storage. - auto actual_table_shared_id = getTableSharedID(); - auto expected_table_shared_id = tryGetTableSharedIDFromCreateQuery(*create_query, getContext()); - if (actual_table_shared_id != expected_table_shared_id) - { - throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, "Table {} has its shared ID different from one from the create query: " - "actual shared id = {}, expected shared id = {}, create query = {}", - getStorageID().getNameForLogs(), actual_table_shared_id, expected_table_shared_id.value_or("nullopt"), - create_query); - } } catch (...) { - /// We can continue making a backup with non-adjusted name. + /// We can continue making a backup with non-adjusted query. tryLogCurrentException(log, "Failed to adjust the create query of this table for backup"); } } @@ -10462,8 +10464,8 @@ void StorageReplicatedMergeTree::backupData( auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, read_settings, local_context); auto coordination = backup_entries_collector.getBackupCoordination(); - String shared_id = getTableSharedID(); - coordination->addReplicatedDataPath(shared_id, data_path_in_backup); + + coordination->addReplicatedDataPath(full_zookeeper_path, data_path_in_backup); using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum; std::vector part_names_with_hashes; @@ -10472,7 +10474,7 @@ void StorageReplicatedMergeTree::backupData( part_names_with_hashes.emplace_back(PartNameAndChecksum{part_backup_entries.part_name, part_backup_entries.part_checksum}); /// Send our list of part names to the coordination (to compare with other replicas). - coordination->addReplicatedPartNames(shared_id, getStorageID().getFullTableName(), getReplicaName(), part_names_with_hashes); + coordination->addReplicatedPartNames(full_zookeeper_path, getStorageID().getFullTableName(), getReplicaName(), part_names_with_hashes); /// Send a list of mutations to the coordination too (we need to find the mutations which are not finished for added part names). { @@ -10514,25 +10516,25 @@ void StorageReplicatedMergeTree::backupData( } if (!mutation_infos.empty()) - coordination->addReplicatedMutations(shared_id, getStorageID().getFullTableName(), getReplicaName(), mutation_infos); + coordination->addReplicatedMutations(full_zookeeper_path, getStorageID().getFullTableName(), getReplicaName(), mutation_infos); } } /// This task will be executed after all replicas have collected their parts and the coordination is ready to /// give us the final list of parts to add to the BackupEntriesCollector. - auto post_collecting_task = [shared_id, + auto post_collecting_task = [my_full_zookeeper_path = full_zookeeper_path, my_replica_name = getReplicaName(), coordination, my_parts_backup_entries = std::move(parts_backup_entries), &backup_entries_collector]() { - Strings data_paths = coordination->getReplicatedDataPaths(shared_id); + Strings data_paths = coordination->getReplicatedDataPaths(my_full_zookeeper_path); std::vector data_paths_fs; data_paths_fs.reserve(data_paths.size()); for (const auto & data_path : data_paths) data_paths_fs.push_back(data_path); - Strings part_names = coordination->getReplicatedPartNames(shared_id, my_replica_name); + Strings part_names = coordination->getReplicatedPartNames(my_full_zookeeper_path, my_replica_name); std::unordered_set part_names_set{part_names.begin(), part_names.end()}; for (const auto & part_backup_entries : my_parts_backup_entries) @@ -10545,7 +10547,7 @@ void StorageReplicatedMergeTree::backupData( } } - auto mutation_infos = coordination->getReplicatedMutations(shared_id, my_replica_name); + auto mutation_infos = coordination->getReplicatedMutations(my_full_zookeeper_path, my_replica_name); for (const auto & mutation_info : mutation_infos) { auto backup_entry = ReplicatedMergeTreeMutationEntry::parse(mutation_info.entry, mutation_info.id).backup(); @@ -10559,8 +10561,7 @@ void StorageReplicatedMergeTree::backupData( void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) { - String full_zk_path = getZooKeeperName() + getZooKeeperPath(); - if (!restorer.getRestoreCoordination()->acquireInsertingDataIntoReplicatedTable(full_zk_path)) + if (!restorer.getRestoreCoordination()->acquireInsertingDataIntoReplicatedTable(full_zookeeper_path)) { /// Other replica is already restoring the data of this table. /// We'll get them later due to replication, it's not necessary to read it from the backup. diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c472c11e7f8..f96206ce657 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -307,7 +307,7 @@ public: /// Get best replica having this partition on a same type remote disk String getSharedDataReplica(const IMergeTreeDataPart & part, const DataSourceDescription & data_source_description) const; - inline const String & getReplicaName() const { return replica_name; } + const String & getReplicaName() const { return replica_name; } /// Restores table metadata if ZooKeeper lost it. /// Used only on restarted readonly replicas (not checked). All active (Active) parts are moved to detached/ @@ -330,17 +330,14 @@ public: // Return default or custom zookeeper name for table const String & getZooKeeperName() const { return zookeeper_name; } - const String & getZooKeeperPath() const { return zookeeper_path; } + const String & getFullZooKeeperPath() const { return full_zookeeper_path; } // Return table id, common for different replicas String getTableSharedID() const override; std::map getUnfinishedMutationCommands() const override; - /// Returns the same as getTableSharedID(), but extracts it from a create query. - static std::optional tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); - static const String & getDefaultZooKeeperName() { return default_zookeeper_name; } /// Check if there are new broken disks and enqueue part recovery tasks. @@ -420,9 +417,11 @@ private: bool is_readonly_metric_set = false; + const String full_zookeeper_path; static const String default_zookeeper_name; const String zookeeper_name; const String zookeeper_path; + const String replica_name; const String replica_path; @@ -567,7 +566,6 @@ private: void readParallelReplicasImpl( QueryPlan & query_plan, const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum processed_stage); @@ -725,7 +723,7 @@ private: * Call when merge_selecting_mutex is locked. * Returns false if any part is not in ZK. */ - enum class CreateMergeEntryResult { Ok, MissingPart, LogUpdated, Other }; + enum class CreateMergeEntryResult : uint8_t { Ok, MissingPart, LogUpdated, Other }; CreateMergeEntryResult createLogEntryToMergeParts( zkutil::ZooKeeperPtr & zookeeper, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp deleted file mode 100644 index 3fe0b66a453..00000000000 --- a/src/Storages/StorageS3.cpp +++ /dev/null @@ -1,2019 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#include -#pragma clang diagnostic pop - -namespace fs = std::filesystem; - - -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; - extern const Metric StorageS3ThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -static const std::unordered_set required_configuration_keys = { - "url", -}; -static const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "compression_method", - "structure", - "access_key_id", - "secret_access_key", - "session_token", - "filename", - "use_environment_credentials", - "max_single_read_retries", - "min_upload_part_size", - "upload_part_size_multiply_factor", - "upload_part_size_multiply_parts_count_threshold", - "max_single_part_upload_size", - "max_connections", - "expiration_window_seconds", - "no_sign_request" -}; - -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_TEXT; - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int S3_ERROR; - extern const int UNEXPECTED_EXPRESSION; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int FILE_DOESNT_EXIST; - extern const int NO_ELEMENTS_IN_CONFIG; -} - - -class ReadFromStorageS3Step : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromStorageS3Step"; } - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromStorageS3Step( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - StorageS3 & storage_, - ReadFromFormatInfo read_from_format_info_, - bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , column_names(column_names_) - , storage(storage_) - , read_from_format_info(std::move(read_from_format_info_)) - , need_only_count(need_only_count_) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - query_configuration = storage.updateConfigurationAndGetCopy(context); - virtual_columns = storage.getVirtualsList(); - } - -private: - Names column_names; - StorageS3 & storage; - ReadFromFormatInfo read_from_format_info; - bool need_only_count; - StorageS3::Configuration query_configuration; - NamesAndTypesList virtual_columns; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - - -class IOutputFormat; -using OutputFormatPtr = std::shared_ptr; - -class StorageS3Source::DisclosedGlobIterator::Impl : WithContext -{ -public: - Impl( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : WithContext(context_) - , client(client_.clone()) - , globbed_uri(globbed_uri_) - , virtual_columns(virtual_columns_) - , read_keys(read_keys_) - , request_settings(request_settings_) - , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, "ListObjects")) - , file_progress_callback(file_progress_callback_) - { - if (globbed_uri.bucket.find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); - - const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == globbed_uri.key.size()) - { - buffer.emplace_back(std::make_shared(globbed_uri.key, std::nullopt)); - buffer_iter = buffer.begin(); - is_finished = true; - return; - } - - request.SetBucket(globbed_uri.bucket); - request.SetPrefix(key_prefix); - request.SetMaxKeys(static_cast(request_settings.list_object_keys_size)); - - outcome_future = listObjectsAsync(); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(globbed_uri.key)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error()); - - recursive = globbed_uri.key == "/**" ? true : false; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - fillInternalBufferAssumeLocked(); - } - - KeyWithInfoPtr next(size_t) - { - std::lock_guard lock(mutex); - return nextAssumeLocked(); - } - - size_t objectsCount() - { - return buffer.size(); - } - - ~Impl() - { - list_objects_pool.wait(); - } - -private: - using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome; - - KeyWithInfoPtr nextAssumeLocked() - { - do - { - if (buffer_iter != buffer.end()) - { - auto answer = *buffer_iter; - ++buffer_iter; - - /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. - /// So we get object info lazily here on 'next()' request. - if (!answer->info) - { - answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); - if (file_progress_callback) - file_progress_callback(FileProgress(0, answer->info->size)); - } - - return answer; - } - - if (is_finished) - return {}; - - try - { - fillInternalBufferAssumeLocked(); - } - catch (...) - { - /// In case of exception thrown while listing new batch of files - /// iterator may be partially initialized and its further using may lead to UB. - /// Iterator is used by several processors from several threads and - /// it may take some time for threads to stop processors and they - /// may still use this iterator after exception is thrown. - /// To avoid this UB, reset the buffer and return defaults for further calls. - is_finished = true; - buffer.clear(); - buffer_iter = buffer.begin(); - throw; - } - } while (true); - } - - void fillInternalBufferAssumeLocked() - { - buffer.clear(); - assert(outcome_future.valid()); - auto outcome = outcome_future.get(); - - if (!outcome.IsSuccess()) - { - throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - } - - const auto & result_batch = outcome.GetResult().GetContents(); - - /// It returns false when all objects were returned - is_finished = !outcome.GetResult().GetIsTruncated(); - - if (!is_finished) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - list_objects_pool.wait(); - outcome_future = listObjectsAsync(); - } - - if (request_settings.throw_on_zero_files_match && result_batch.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix()); - - KeysWithInfo temp_buffer; - temp_buffer.reserve(result_batch.size()); - - for (const auto & row : result_batch) - { - String key = row.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - S3::ObjectInfo info = - { - .size = size_t(row.GetSize()), - .last_modification_time = row.GetLastModified().Millis() / 1000, - }; - - temp_buffer.emplace_back(std::make_shared(std::move(key), std::move(info))); - } - } - - if (temp_buffer.empty()) - { - buffer_iter = buffer.begin(); - return; - } - - if (filter_dag) - { - std::vector paths; - paths.reserve(temp_buffer.size()); - for (const auto & key_with_info : temp_buffer) - paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key); - - VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext()); - } - - buffer = std::move(temp_buffer); - - if (file_progress_callback) - { - for (const auto & key_with_info : buffer) - file_progress_callback(FileProgress(0, key_with_info->info->size)); - } - - /// Set iterator only after the whole batch is processed - buffer_iter = buffer.begin(); - - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - } - - std::future listObjectsAsync() - { - return list_objects_scheduler([this] - { - ProfileEvents::increment(ProfileEvents::S3ListObjects); - auto outcome = client->ListObjectsV2(request); - - /// Outcome failure will be handled on the caller side. - if (outcome.IsSuccess()) - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - return outcome; - }, Priority{}); - } - - std::mutex mutex; - - KeysWithInfo buffer; - KeysWithInfo::iterator buffer_iter; - - std::unique_ptr client; - S3::URI globbed_uri; - ASTPtr query; - NamesAndTypesList virtual_columns; - ActionsDAGPtr filter_dag; - std::unique_ptr matcher; - bool recursive{false}; - bool is_finished{false}; - KeysWithInfo * read_keys; - - S3::ListObjectsV2Request request; - S3Settings::RequestSettings request_settings; - - ThreadPool list_objects_pool; - ThreadPoolCallbackRunnerUnsafe list_objects_scheduler; - std::future outcome_future; - std::function file_progress_callback; -}; - -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : pimpl(std::make_shared(client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount() -{ - return pimpl->objectsCount(); -} - -class StorageS3Source::KeysIterator::Impl -{ -public: - explicit Impl( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys_, - std::function file_progress_callback_) - : keys(keys_) - , client(client_.clone()) - , version_id(version_id_) - , bucket(bucket_) - , request_settings(request_settings_) - , file_progress_callback(file_progress_callback_) - { - if (read_keys_) - { - for (const auto & key : keys) - read_keys_->push_back(std::make_shared(key)); - } - } - - KeyWithInfoPtr next(size_t) - { - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - auto key = keys[current_index]; - std::optional info; - if (file_progress_callback) - { - info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); - file_progress_callback(FileProgress(0, info->size)); - } - - return std::make_shared(key, info); - } - - size_t objectsCount() - { - return keys.size(); - } - -private: - Strings keys; - std::atomic_size_t index = 0; - std::unique_ptr client; - String version_id; - String bucket; - S3Settings::RequestSettings request_settings; - std::function file_progress_callback; -}; - -StorageS3Source::KeysIterator::KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, request_settings_, - read_keys, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::KeysIterator::estimatedKeysCount() -{ - return pimpl->objectsCount(); -} - -StorageS3Source::ReadTaskIterator::ReadTaskIterator( - const DB::ReadTaskCallback & callback_, - size_t max_threads_count) - : callback(callback_) -{ - ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count); - auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "S3ReadTaskItr"); - - std::vector> keys; - keys.reserve(max_threads_count); - for (size_t i = 0; i < max_threads_count; ++i) - keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); - - pool.wait(); - buffer.reserve(max_threads_count); - for (auto & key_future : keys) - buffer.emplace_back(std::make_shared(key_future.get(), std::nullopt)); -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= buffer.size()) - return std::make_shared(callback()); - - while (current_index < buffer.size()) - { - if (const auto & key_info = buffer[current_index]; key_info && !key_info->key.empty()) - return buffer[current_index]; - - current_index = index.fetch_add(1, std::memory_order_relaxed); - } - - return nullptr; -} - -size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount() -{ - return buffer.size(); -} - -StorageS3Source::StorageS3Source( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket_, - const String & version_id_, - const String & url_host_and_port_, - std::shared_ptr file_iterator_, - const size_t max_parsing_threads_, - bool need_only_count_) - : SourceWithKeyCondition(info.source_header, false) - , WithContext(context_) - , name(std::move(name_)) - , bucket(bucket_) - , version_id(version_id_) - , url_host_and_port(url_host_and_port_) - , format(format_) - , columns_desc(info.columns_description) - , requested_columns(info.requested_columns) - , max_block_size(max_block_size_) - , request_settings(request_settings_) - , compression_hint(std::move(compression_hint_)) - , client(client_) - , sample_block(info.format_header) - , format_settings(format_settings_) - , requested_virtual_columns(info.requested_virtual_columns) - , file_iterator(file_iterator_) - , max_parsing_threads(max_parsing_threads_) - , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "CreateS3Reader")) -{ -} - -void StorageS3Source::lazyInitialize(size_t idx) -{ - if (initialized) - return; - - reader = createReader(idx); - if (reader) - reader_future = createReaderAsync(idx); - initialized = true; -} - -StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx) -{ - KeyWithInfoPtr key_with_info; - do - { - key_with_info = file_iterator->next(idx); - if (!key_with_info || key_with_info->key.empty()) - return {}; - - if (!key_with_info->info) - key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings); - } - while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - auto compression_method = chooseCompressionMethod(key_with_info->key, compression_hint); - read_buf = createS3ReadBuffer(key_with_info->key, key_with_info->info->size); - - auto input_format = FormatFactory::instance().getInput( - format, - *read_buf, - sample_block, - getContext(), - max_block_size, - format_settings, - max_parsing_threads, - /* max_download_threads= */ std::nullopt, - /* is_remote_fs */ true, - compression_method, - need_only_count); - - if (key_condition) - input_format->setKeyCondition(key_condition); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageS3Source::createReaderAsync(size_t idx) -{ - return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{}); -} - -std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size); - return createAsyncS3ReadBuffer(key, read_settings, object_size); - } - - return std::make_unique( - client, bucket, key, version_id, request_settings, read_settings, - /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0, - /*restricted_seek_*/ false, object_size); -} - -std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) -{ - auto context = getContext(); - auto read_buffer_creator = - [this, read_settings, object_size] - (bool restricted_seek, const StoredObject & object) -> std::unique_ptr - { - return std::make_unique( - client, - bucket, - object.remote_path, - version_id, - request_settings, - read_settings, - /* use_external_buffer */true, - /* offset */0, - /* read_until_position */0, - restricted_seek, - object_size); - }; - - auto modified_settings{read_settings}; - /// User's S3 object may change, don't cache it. - modified_settings.use_page_cache_for_disks_without_file_cache = false; - - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - - auto s3_impl = std::make_unique( - std::move(read_buffer_creator), - StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, - "", - read_settings, - /* cache_log */nullptr, /* use_external_buffer */true); - - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - auto async_reader = std::make_unique( - std::move(s3_impl), pool_reader, modified_settings, - context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -StorageS3Source::~StorageS3Source() -{ - create_reader_pool.wait(); -} - -String StorageS3Source::getName() const -{ - return name; -} - -Chunk StorageS3Source::generate() -{ - lazyInitialize(); - - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize()); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getFile(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageS3Source::addNumRowsToCache(const String & key, size_t num_rows) -{ - String source = fs::path(url_host_and_port) / bucket / key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo & key_with_info) -{ - String source = fs::path(url_host_and_port) / bucket / key_with_info.key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - return key_with_info.info->last_modification_time; - }; - - return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class StorageS3Sink : public SinkToStorage -{ -public: - StorageS3Sink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - const StorageS3::Configuration & configuration_, - const String & bucket, - const String & key) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - BlobStorageLogWriterPtr blob_log = nullptr; - if (auto blob_storage_log = context->getBlobStorageLog()) - { - blob_log = std::make_shared(std::move(blob_storage_log)); - blob_log->query_id = context->getCurrentQueryId(); - } - - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - configuration_.client, - bucket, - key, - DBMS_DEFAULT_BUFFER_SIZE, - configuration_.request_settings, - std::move(blob_log), - std::nullopt, - threadPoolCallbackRunnerUnsafe(getIOThreadPool().get(), "S3ParallelWrite"), - context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer - = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageS3Sink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf.reset(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number) - { - if (context->getSettingsRef().s3_truncate_on_insert || !S3::objectExists(*configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings)) - return std::nullopt; - - if (context->getSettingsRef().s3_create_new_file_on_insert) - { - auto pos = key.find_first_of('.'); - String new_key; - do - { - new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); - ++sequence_number; - } - while (S3::objectExists(*configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings)); - - return new_key; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - configuration.url.bucket, key); - } -} - - -class PartitionedStorageS3Sink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageS3Sink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - const StorageS3::Configuration & configuration_, - const String & bucket_, - const String & key_) - : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , configuration(configuration_) - , bucket(bucket_) - , key(key_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_bucket = replaceWildcards(bucket, partition_id); - validateBucket(partition_bucket); - - auto partition_key = replaceWildcards(key, partition_id); - validateKey(partition_key); - - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(getContext(), configuration, partition_key, /* sequence_number */1)) - partition_key = *new_key; - - return std::make_shared( - format, - sample_block, - getContext(), - format_settings, - compression_method, - configuration, - partition_bucket, - partition_key - ); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - const StorageS3::Configuration configuration; - const String bucket; - const String key; - const std::optional format_settings; - - static void validateBucket(const String & str) - { - S3::URI::validateBucket(str, {}); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); - - validatePartitionKey(str, false); - } - - static void validateKey(const String & str) - { - /// See: - /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html - /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject - - if (str.empty() || str.size() > 1024) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); - - validatePartitionKey(str, true); - } -}; - - -StorageS3::StorageS3( - const Configuration & configuration_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , configuration(configuration_) - , name(configuration.url.storage_name) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); - context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(configuration, format_settings, context_); - else - columns = getTableStructureFromData(configuration, format_settings, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(configuration, format_settings, context_).second; - - /// We don't allow special columns in S3 storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -static std::shared_ptr createFileIterator( - const StorageS3::Configuration & configuration, - bool distributed_processing, - ContextPtr local_context, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - StorageS3::KeysWithInfo * read_keys = nullptr, - std::function file_progress_callback = {}) -{ - if (distributed_processing) - { - return std::make_shared(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - return std::make_shared( - *configuration.client, configuration.url, predicate, virtual_columns, - local_context, read_keys, configuration.request_settings, file_progress_callback); - } - else - { - Strings keys = configuration.keys; - auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - if (filter_dag) - { - std::vector paths; - paths.reserve(keys.size()); - for (const auto & key : keys) - paths.push_back(fs::path(configuration.url.bucket) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); - } - - return std::make_shared( - *configuration.client, configuration.url.version_id, keys, - configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback); - } -} - -bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); -} - -bool StorageS3::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); -} - -bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); -} - -void StorageS3::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - *this, - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - iterator_wrapper = createFileIterator( - query_configuration, storage.distributed_processing, context, predicate, - virtual_columns, nullptr, context->getFileProgressCallback()); -} - -void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - if (storage.partition_by && query_configuration.withWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet"); - - createIterator(nullptr); - - size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); - if (estimated_keys_count > 1) - num_streams = std::min(num_streams, estimated_keys_count); - else - /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case. - num_streams = 1; - - const auto & settings = context->getSettingsRef(); - const size_t max_parsing_threads = num_streams >= settings.max_parsing_threads ? 1 : (settings.max_parsing_threads / std::max(num_streams, 1ul)); - LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); - - Pipes pipes; - pipes.reserve(num_streams); - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - read_from_format_info, - query_configuration.format, - storage.getName(), - context, - storage.format_settings, - max_block_size, - query_configuration.request_settings, - query_configuration.compression_method, - query_configuration.client, - query_configuration.url.bucket, - query_configuration.url.version_id, - query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), - iterator_wrapper, - max_parsing_threads, - need_only_count); - - source->setKeyCondition(filter_actions_dag, context); - pipes.emplace_back(std::move(source)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(read_from_format_info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - auto key = query_configuration.keys.front(); - - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(query_configuration.keys.back(), query_configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && query_configuration.withWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } - else - { - if (query_configuration.withGlobs()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key); - - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, configuration, query_configuration.keys.front(), query_configuration.keys.size())) - { - query_configuration.keys.push_back(*new_key); - configuration.keys.push_back(*new_key); - key = *new_key; - } - - return std::make_shared( - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } -} - -void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - - if (query_configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", - query_configuration.url.key); - } - - Aws::S3::Model::Delete delkeys; - - for (const auto & key : query_configuration.keys) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(key); - delkeys.AddObjects(std::move(obj)); - } - - ProfileEvents::increment(ProfileEvents::S3DeleteObjects); - S3::DeleteObjectsRequest request; - request.SetBucket(query_configuration.url.bucket); - request.SetDelete(delkeys); - - auto response = query_configuration.client->DeleteObjects(request); - - const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError(); - auto time_now = std::chrono::system_clock::now(); - if (auto blob_storage_log = BlobStorageLogWriter::create()) - { - for (const auto & key : query_configuration.keys) - blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now); - } - - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw S3Exception(err.GetMessage(), err.GetErrorType()); - } - - for (const auto & error : response.GetResult().GetErrors()) - LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); -} - -StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); - return configuration; -} - -void StorageS3::updateConfiguration(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); -} - -void StorageS3::useConfiguration(const Configuration & new_configuration) -{ - std::lock_guard lock(configuration_update_mutex); - configuration = new_configuration; -} - -const StorageS3::Configuration & StorageS3::getConfiguration() -{ - std::lock_guard lock(configuration_update_mutex); - return configuration; -} - -bool StorageS3::Configuration::update(const ContextPtr & context) -{ - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()); - request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context->getSettings()); - - if (client && (static_configuration || !auth_settings.hasUpdates(s3_settings.auth_settings))) - return false; - - auth_settings.updateFrom(s3_settings.auth_settings); - keys[0] = url.key; - connect(context); - return true; -} - -void StorageS3::Configuration::connect(const ContextPtr & context) -{ - const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); - const Settings & local_settings = context->getSettingsRef(); - - if (S3::isS3ExpressEndpoint(url.endpoint) && auth_settings.region.empty()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets"); - - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - auth_settings.region, - context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ false, - request_settings.get_request_throttler, - request_settings.put_request_throttler, - url.uri.getScheme()); - - client_configuration.endpointOverride = url.endpoint; - /// seems as we don't use it - client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.connectTimeoutMs = local_settings.s3_connect_timeout_ms; - client_configuration.http_keep_alive_timeout = S3::DEFAULT_KEEP_ALIVE_TIMEOUT; - client_configuration.http_keep_alive_max_requests = S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS; - - auto headers = auth_settings.headers; - if (!headers_from_ast.empty()) - headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); - - client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; - - S3::ClientSettings client_settings{ - .use_virtual_addressing = url.is_virtual_hosted_style, - .disable_checksum = local_settings.s3_disable_checksum, - .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), - .is_s3express_bucket = S3::isS3ExpressEndpoint(url.endpoint), - }; - - auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); - client = S3::ClientFactory::instance().create( - client_configuration, - client_settings, - credentials.GetAWSAccessKeyId(), - credentials.GetAWSSecretKey(), - auth_settings.server_side_encryption_customer_key_base64, - auth_settings.server_side_encryption_kms_config, - std::move(headers), - S3::CredentialsConfiguration{ - auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), - auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - auth_settings.expiration_window_seconds.value_or( - context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }, - credentials.GetSessionToken()); -} - -void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - auto filename = collection.getOrDefault("filename", ""); - if (!filename.empty()) - configuration.url = S3::URI(std::filesystem::path(collection.get("url")) / filename); - else - configuration.url = S3::URI(collection.get("url")); - - configuration.auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); - configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); - configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); - configuration.auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); - configuration.auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); - - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); - configuration.structure = collection.getOrDefault("structure", "auto"); - - configuration.request_settings = S3Settings::RequestSettings(collection); -} - -StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) -{ - StorageS3::Configuration configuration; - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - } - else - { - /// Supported signatures: - /// - /// S3('url') - /// S3('url', 'format') - /// S3('url', 'format', 'compression') - /// S3('url', NOSIGN) - /// S3('url', NOSIGN, 'format') - /// S3('url', NOSIGN, 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') - /// with optional headers() function - - size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context); - - if (count == 0 || count > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage S3 requires 1 to 5 arguments: " - "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); - - std::unordered_map engine_args_to_idx; - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - engine_args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 2 possible variants: - /// - s3(source, format, compression_method) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, access_key_id, secret_access_key, format) - /// - s3(source, NOSIGN, format, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - } - else - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - } - /// For 5 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, access_key_id, secret_access_key, format, compression) - else if (count == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; - } - else - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - } - else if (count == 6) - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; - } - - /// This argument is always the first - configuration.url = S3::URI(checkAndGetLiteralArgument(engine_args[0], "url")); - - if (engine_args_to_idx.contains("format")) - configuration.format = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["format"]], "format"); - - if (engine_args_to_idx.contains("compression_method")) - configuration.compression_method = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["compression_method"]], "compression_method"); - - if (engine_args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id"); - - if (engine_args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); - - if (engine_args_to_idx.contains("session_token")) - configuration.auth_settings.session_token = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); - - if (no_sign_request) - configuration.auth_settings.no_sign_request = no_sign_request; - } - - configuration.static_configuration = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); - - configuration.keys = {configuration.url.key}; - - if (configuration.format == "auto" && get_format_from_file) - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.url.key).value_or("auto"); - - return configuration; -} - -ColumnsDescription StorageS3::getTableStructureFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration.format, configuration, format_settings, ctx).first; -} - -std::pair StorageS3::getTableStructureAndFormatFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - std::shared_ptr file_iterator_, - const StorageS3Source::KeysWithInfo & read_keys_, - const StorageS3::Configuration & configuration_, - std::optional format_, - const std::optional & format_settings_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , read_keys(read_keys_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key_with_info : read_keys) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->key)) - { - format = format_from_file_name; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - while (true) - { - current_key_with_info = (*file_iterator)(); - - if (!current_key_with_info || current_key_with_info->key.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in S3 or all files are empty. You can specify table structure manually", - *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in S3 or all files are empty. You can specify the format manually"); - } - - return {nullptr, std::nullopt, format}; - } - - /// S3 file iterator could get new keys after new iteration - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->key)) - { - format = format_from_file_name; - break; - } - } - } - - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) - continue; - - /// In union mode, check cached columns only for current key. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - StorageS3::KeysWithInfo keys = {current_key_with_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) - { - first = false; - return {nullptr, columns_from_cache, format}; - } - } - - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) - { - first = false; - return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt, format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key; - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_key_with_info) - return current_key_with_info->key; - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_key_with_info); - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max); - } - - private: - std::optional tryGetColumnsFromCache( - const StorageS3::KeysWithInfo::const_iterator & begin, - const StorageS3::KeysWithInfo::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_s3) - return std::nullopt; - - auto & schema_cache = StorageS3::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] - { - time_t last_modification_time = 0; - if ((*it)->info) - { - last_modification_time = (*it)->info->last_modification_time; - } - else - { - /// Note that in case of exception in getObjectInfo returned info will be empty, - /// but schema cache will handle this case and won't return columns from cache - /// because we can't say that it's valid without last modification time. - last_modification_time = S3::getObjectInfo( - *configuration.client, - configuration.url.bucket, - (*it)->key, - configuration.url.version_id, - configuration.request_settings, - /*with_metadata=*/ false, - /*throw_on_error= */ false).last_modification_time; - } - - return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt; - }; - - String path = fs::path(configuration.url.bucket) / (*it)->key; - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; - - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - const StorageS3Source::KeysWithInfo & read_keys; - const StorageS3::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - StorageS3Source::KeyWithInfoPtr current_key_with_info; - size_t prev_read_keys_size; - bool first = true; - }; - -} - -std::pair StorageS3::getTableStructureAndFormatFromDataImpl( - std::optional format, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - KeysWithInfo read_keys; - - auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys); - - ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format, format_settings, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -void registerStorageS3Impl(const String & name, StorageFactory & factory) -{ - factory.registerStorage(name, [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - std::move(configuration), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing_ */false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} - -void registerStorageS3(StorageFactory & factory) -{ - return registerStorageS3Impl("S3", factory); -} - -void registerStorageCOS(StorageFactory & factory) -{ - return registerStorageS3Impl("COSN", factory); -} - -void registerStorageOSS(StorageFactory & factory) -{ - return registerStorageS3Impl("OSS", factory); -} - -bool StorageS3::supportsPartitionBy() const -{ - return true; -} - -SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h deleted file mode 100644 index c8ab28fb20e..00000000000 --- a/src/Storages/StorageS3.h +++ /dev/null @@ -1,397 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace DB -{ - -class PullingPipelineExecutor; -class NamedCollection; - -class StorageS3Source : public SourceWithKeyCondition, WithContext -{ -public: - - struct KeyWithInfo - { - KeyWithInfo() = default; - - explicit KeyWithInfo(String key_, std::optional info_ = std::nullopt) - : key(std::move(key_)), info(std::move(info_)) {} - - virtual ~KeyWithInfo() = default; - - String key; - std::optional info; - }; - using KeyWithInfoPtr = std::shared_ptr; - - using KeysWithInfo = std::vector; - - class IIterator - { - public: - virtual ~IIterator() = default; - virtual KeyWithInfoPtr next(size_t idx = 0) = 0; /// NOLINT - - /// Estimates how many streams we need to process all files. - /// If keys count >= max_threads_count, the returned number may not represent the actual number of the keys. - /// Intended to be called before any next() calls, may underestimate otherwise - /// fixme: May underestimate if the glob has a strong filter, so there are few matches among the first 1000 ListObjects results. - virtual size_t estimatedKeysCount() = 0; - - KeyWithInfoPtr operator ()() { return next(); } - }; - - class DisclosedGlobIterator : public IIterator - { - public: - DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - const ContextPtr & context, - KeysWithInfo * read_keys_ = nullptr, - const S3Settings::RequestSettings & request_settings_ = {}, - std::function progress_callback_ = {}); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class KeysIterator : public IIterator - { - public: - explicit KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys = nullptr, - std::function progress_callback_ = {}); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class ReadTaskIterator : public IIterator - { - public: - explicit ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - KeysWithInfo buffer; - std::atomic_size_t index = 0; - - ReadTaskCallback callback; - }; - - StorageS3Source( - const ReadFromFormatInfo & info, - const String & format, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket, - const String & version_id, - const String & url_host_and_port, - std::shared_ptr file_iterator_, - size_t max_parsing_threads, - bool need_only_count_); - - ~StorageS3Source() override; - - String getName() const override; - - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override - { - setKeyConditionImpl(filter_actions_dag, context_, sample_block); - } - - Chunk generate() override; - -private: - friend class StorageS3QueueSource; - - String name; - String bucket; - String version_id; - String url_host_and_port; - String format; - ColumnsDescription columns_desc; - NamesAndTypesList requested_columns; - UInt64 max_block_size; - S3Settings::RequestSettings request_settings; - String compression_hint; - std::shared_ptr client; - Block sample_block; - std::optional format_settings; - - struct ReaderHolder - { - public: - ReaderHolder( - KeyWithInfoPtr key_with_info_, - String bucket_, - std::unique_ptr read_buf_, - std::shared_ptr source_, - std::unique_ptr pipeline_, - std::unique_ptr reader_) - : key_with_info(key_with_info_) - , bucket(std::move(bucket_)) - , read_buf(std::move(read_buf_)) - , source(std::move(source_)) - , pipeline(std::move(pipeline_)) - , reader(std::move(reader_)) - { - } - - ReaderHolder() = default; - ReaderHolder(const ReaderHolder & other) = delete; - ReaderHolder & operator=(const ReaderHolder & other) = delete; - - ReaderHolder(ReaderHolder && other) noexcept - { - *this = std::move(other); - } - - ReaderHolder & operator=(ReaderHolder && other) noexcept - { - /// The order of destruction is important. - /// reader uses pipeline, pipeline uses read_buf. - reader = std::move(other.reader); - pipeline = std::move(other.pipeline); - source = std::move(other.source); - read_buf = std::move(other.read_buf); - key_with_info = std::move(other.key_with_info); - bucket = std::move(other.bucket); - return *this; - } - - explicit operator bool() const { return reader != nullptr; } - PullingPipelineExecutor * operator->() { return reader.get(); } - const PullingPipelineExecutor * operator->() const { return reader.get(); } - String getPath() const { return fs::path(bucket) / key_with_info->key; } - const String & getFile() const { return key_with_info->key; } - const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; } - std::optional getFileSize() const { return key_with_info->info ? std::optional(key_with_info->info->size) : std::nullopt; } - - const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } - - private: - KeyWithInfoPtr key_with_info; - String bucket; - std::unique_ptr read_buf; - std::shared_ptr source; - std::unique_ptr pipeline; - std::unique_ptr reader; - }; - - ReaderHolder reader; - - NamesAndTypesList requested_virtual_columns; - std::shared_ptr file_iterator; - size_t max_parsing_threads = 1; - bool need_only_count; - - LoggerPtr log = getLogger("StorageS3Source"); - - ThreadPool create_reader_pool; - ThreadPoolCallbackRunnerUnsafe create_reader_scheduler; - std::future reader_future; - std::atomic initialized{false}; - - size_t total_rows_in_file = 0; - - /// Notice: we should initialize reader and future_reader lazily in generate to make sure key_condition - /// is set before createReader is invoked for key_condition is read in createReader. - void lazyInitialize(size_t idx = 0); - - /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(size_t idx = 0); - std::future createReaderAsync(size_t idx = 0); - - std::unique_ptr createS3ReadBuffer(const String & key, size_t object_size); - std::unique_ptr createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size); - - void addNumRowsToCache(const String & key, size_t num_rows); - std::optional tryGetNumRowsFromCache(const KeyWithInfo & key_with_info); -}; - -/** - * This class represents table engine for external S3 urls. - * It sends HTTP GET to server when select is called and - * HTTP PUT when insert is called. - */ -class StorageS3 : public IStorage -{ -public: - struct Configuration : public StatelessTableEngineConfiguration - { - Configuration() = default; - - String getPath() const { return url.key; } - - bool update(const ContextPtr & context); - - void connect(const ContextPtr & context); - - bool withGlobs() const { return url.key.find_first_of("*?{") != std::string::npos; } - - bool withWildcard() const - { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return url.bucket.find(PARTITION_ID_WILDCARD) != String::npos - || keys.back().find(PARTITION_ID_WILDCARD) != String::npos; - } - - S3::URI url; - S3::AuthSettings auth_settings; - S3Settings::RequestSettings request_settings; - /// If s3 configuration was passed from ast, then it is static. - /// If from config - it can be changed with config reload. - bool static_configuration = true; - /// Headers from ast is a part of static configuration. - HTTPHeaderEntries headers_from_ast; - - std::shared_ptr client; - std::vector keys; - }; - - StorageS3( - const Configuration & configuration_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_ = false, - ASTPtr partition_by_ = nullptr); - - String getName() const override - { - return name; - } - - void read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; - - void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - - bool supportsPartitionBy() const override; - - static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection); - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - static StorageS3::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file = true); - - static ColumnsDescription getTableStructureFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - - static std::pair getTableStructureAndFormatFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - - using KeysWithInfo = StorageS3Source::KeysWithInfo; - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -protected: - virtual Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context); - - virtual void updateConfiguration(const ContextPtr & local_context); - - void useConfiguration(const Configuration & new_configuration); - - const Configuration & getConfiguration(); - -private: - friend class StorageS3Cluster; - friend class TableFunctionS3Cluster; - friend class StorageS3Queue; - friend class ReadFromStorageS3Step; - - Configuration configuration; - std::mutex configuration_update_mutex; - - String name; - const bool distributed_processing; - std::optional format_settings; - ASTPtr partition_by; - - static std::pair getTableStructureAndFormatFromDataImpl( - std::optional format, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); - - bool supportsSubcolumns() const override { return true; } - - bool supportsSubsetOfColumns(const ContextPtr & context) const; - - bool prefersLargeBlocks() const override; - - bool parallelizeOutputAfterReading(ContextPtr context) const override; -}; - -} - -#endif diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp deleted file mode 100644 index 6b22771b38f..00000000000 --- a/src/Storages/StorageS3Cluster.cpp +++ /dev/null @@ -1,107 +0,0 @@ -#include "Storages/StorageS3Cluster.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageS3Cluster::StorageS3Cluster( - const String & cluster_name_, - const StorageS3::Configuration & configuration_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const ContextPtr & context) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageS3Cluster (" + table_id_.table_name + ")")) - , s3_configuration{configuration_} -{ - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); - context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - updateConfigurationIfChanged(context); - - if (columns_.empty()) - { - ColumnsDescription columns; - /// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function - if (s3_configuration.format == "auto") - std::tie(columns, s3_configuration.format) = StorageS3::getTableStructureAndFormatFromData(s3_configuration, /*format_settings=*/std::nullopt, context); - else - columns = StorageS3::getTableStructureFromData(s3_configuration, /*format_settings=*/std::nullopt, context); - - storage_metadata.setColumns(columns); - } - else - { - if (s3_configuration.format == "auto") - s3_configuration.format = StorageS3::getTableStructureAndFormatFromData(s3_configuration, /*format_settings=*/std::nullopt, context).second; - - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -void StorageS3Cluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); - - TableFunctionS3Cluster::updateStructureAndFormatArgumentsIfNeeded( - expression_list->children, - storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), - s3_configuration.format, - context); -} - -void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) -{ - s3_configuration.update(local_context); -} - -RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, predicate, getVirtualsList(), context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); - - auto callback = std::make_shared>([iterator]() mutable -> String - { - if (auto next = iterator->next()) - return next->key; - return ""; - }); - return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; -} - -} - -#endif diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h deleted file mode 100644 index 802fd3f9139..00000000000 --- a/src/Storages/StorageS3Cluster.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageS3Cluster : public IStorageCluster -{ -public: - StorageS3Cluster( - const String & cluster_name_, - const StorageS3::Configuration & configuration_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const ContextPtr & context_); - - std::string getName() const override { return "S3Cluster"; } - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - -protected: - void updateConfigurationIfChanged(ContextPtr local_context); - -private: - void updateBeforeRead(const ContextPtr & context) override { updateConfigurationIfChanged(context); } - - void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; - - StorageS3::Configuration s3_configuration; -}; - - -} - -#endif diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 04634bcf1b3..b767805f637 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -18,18 +18,20 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; } -S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings) +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings, bool validate_settings) { - updateFromSettingsImpl(settings, false); - validate(); + updateFromSettings(settings, false); + if (validate_settings) + validate(); } S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix) - : PartUploadSettings(settings) + String setting_name_prefix, + bool validate_settings) + : PartUploadSettings(settings, validate_settings) { String key = config_prefix + "." + setting_name_prefix; strict_upload_part_size = config.getUInt64(key + "strict_upload_part_size", strict_upload_part_size); @@ -46,7 +48,8 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( storage_class_name = config.getString(config_prefix + ".s3_storage_class", storage_class_name); storage_class_name = Poco::toUpperInPlace(storage_class_name); - validate(); + if (validate_settings) + validate(); } S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection) @@ -65,7 +68,7 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedC validate(); } -void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed) +void S3Settings::RequestSettings::PartUploadSettings::updateFromSettings(const Settings & settings, bool if_changed) { if (!if_changed || settings.s3_strict_upload_part_size.changed) strict_upload_part_size = settings.s3_strict_upload_part_size; @@ -108,7 +111,7 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() if (max_upload_part_size > max_upload_part_size_limit) throw Exception( ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_upload_part_size has invalid value {} which is grater than the s3 API limit {}", + "Setting max_upload_part_size has invalid value {} which is greater than the s3 API limit {}", ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit)); if (max_single_part_upload_size > max_upload_part_size_limit) @@ -170,8 +173,8 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() } -S3Settings::RequestSettings::RequestSettings(const Settings & settings) - : upload_settings(settings) +S3Settings::RequestSettings::RequestSettings(const Settings & settings, bool validate_settings) + : upload_settings(settings, validate_settings) { updateFromSettingsImpl(settings, false); } @@ -190,8 +193,9 @@ S3Settings::RequestSettings::RequestSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix) - : upload_settings(config, config_prefix, settings, setting_name_prefix) + String setting_name_prefix, + bool validate_settings) + : upload_settings(config, config_prefix, settings, setting_name_prefix, validate_settings) { String key = config_prefix + "." + setting_name_prefix; max_single_read_retries = config.getUInt64(key + "max_single_read_retries", settings.s3_max_single_read_retries); @@ -262,13 +266,12 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin request_timeout_ms = settings.s3_request_timeout_ms; } -void S3Settings::RequestSettings::updateFromSettings(const Settings & settings) +void S3Settings::RequestSettings::updateFromSettingsIfChanged(const Settings & settings) { updateFromSettingsImpl(settings, true); - upload_settings.updateFromSettings(settings); + upload_settings.updateFromSettings(settings, true); } - void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings) { std::lock_guard lock(mutex); @@ -292,7 +295,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U } } -S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user, bool ignore_user) const +std::optional StorageS3Settings::getSettings(const String & endpoint, const String & user, bool ignore_user) const { std::lock_guard lock(mutex); auto next_prefix_setting = s3_settings.upper_bound(endpoint); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 0f972db02b1..c3bc8aa6ed6 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -39,20 +39,19 @@ struct S3Settings size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024; String storage_class_name; - void updateFromSettings(const Settings & settings) { updateFromSettingsImpl(settings, true); } + void updateFromSettings(const Settings & settings, bool if_changed); void validate(); private: PartUploadSettings() = default; - explicit PartUploadSettings(const Settings & settings); + explicit PartUploadSettings(const Settings & settings, bool validate_settings = true); explicit PartUploadSettings(const NamedCollection & collection); PartUploadSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix = {}); - - void updateFromSettingsImpl(const Settings & settings, bool if_changed); + String setting_name_prefix = {}, + bool validate_settings = true); friend struct RequestSettings; }; @@ -80,7 +79,7 @@ struct S3Settings void setStorageClassName(const String & storage_class_name) { upload_settings.storage_class_name = storage_class_name; } RequestSettings() = default; - explicit RequestSettings(const Settings & settings); + explicit RequestSettings(const Settings & settings, bool validate_settings = true); explicit RequestSettings(const NamedCollection & collection); /// What's the setting_name_prefix, and why do we need it? @@ -94,9 +93,10 @@ struct S3Settings const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix = {}); + String setting_name_prefix = {}, + bool validate_settings = true); - void updateFromSettings(const Settings & settings); + void updateFromSettingsIfChanged(const Settings & settings); private: void updateFromSettingsImpl(const Settings & settings, bool if_changed); @@ -112,7 +112,7 @@ class StorageS3Settings public: void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - S3Settings getSettings(const String & endpoint, const String & user, bool ignore_user = false) const; + std::optional getSettings(const String & endpoint, const String & user, bool ignore_user = false) const; private: mutable std::mutex mutex; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 54218351cf1..a8c8e81e23d 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -130,7 +130,6 @@ StorageSetOrJoinBase::StorageSetOrJoinBase( storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - if (relative_path_.empty()) throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Join and Set storages require data path"); diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 8b087a4a2bc..aada25168f8 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -115,7 +115,7 @@ std::optional StorageSnapshot::tryGetColumn(const GetColumnsOpt { const auto & columns = getMetadataForQuery()->getColumns(); auto column = columns.tryGetColumn(options, column_name); - if (column && (!column->type->hasDynamicSubcolumns() || !options.with_extended_objects)) + if (column && (!column->type->hasDynamicSubcolumnsDeprecated() || !options.with_extended_objects)) return column; if (options.with_extended_objects) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index f47eeb60918..f0c5103d657 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -53,8 +54,13 @@ namespace ErrorCodes extern const int TIMEOUT_EXCEEDED; extern const int CANNOT_RESTORE_TABLE; extern const int NOT_IMPLEMENTED; + extern const int FAULT_INJECTED; } +namespace FailPoints +{ + extern const char stripe_log_sink_write_fallpoint[]; +} /// NOTE: The lock `StorageStripeLog::rwlock` is NOT kept locked while reading, /// because we read ranges of data that do not change. @@ -234,6 +240,11 @@ public: /// Save the new indices. storage.saveIndices(lock); + // While executing save file sizes the exception might occurs. S3::TooManyRequests for example. + fiu_do_on(FailPoints::stripe_log_sink_write_fallpoint, + { + throw Exception(ErrorCodes::FAULT_INJECTED, "Injecting fault for inserting into StipeLog table"); + }); /// Save the new file sizes. storage.saveFileSizes(lock); @@ -371,8 +382,7 @@ Pipe StorageStripeLog::read( = std::make_shared(indices.extractIndexForColumns(NameSet{column_names.begin(), column_names.end()})); size_t size = indices_for_selected_columns->blocks.size(); - if (num_streams > size) - num_streams = size; + num_streams = std::min(num_streams, size); ReadSettings read_settings = local_context->getReadSettings(); Pipes pipes; diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 9d966fb899b..9507eb6ed8a 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -63,14 +63,6 @@ public: StoragePolicyPtr getStoragePolicy() const override { return nullptr; } bool storesDataOnDisk() const override { return false; } - String getName() const override - { - std::lock_guard lock{nested_mutex}; - if (nested) - return nested->getName(); - return StorageProxy::getName(); - } - void startup() override { } void shutdown(bool is_drop) override { diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 679946f9aee..8d1c6933503 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -457,7 +457,7 @@ std::pair> StorageURLSource: const auto settings = context_->getSettings(); - auto proxy_config = getProxyConfiguration(http_method); + auto proxy_config = getProxyConfiguration(request_uri.getScheme()); try { @@ -543,10 +543,11 @@ StorageURLSink::StorageURLSink( std::string content_type = FormatFactory::instance().getContentType(format, context, format_settings); std::string content_encoding = toContentEncodingName(compression_method); - auto proxy_config = getProxyConfiguration(http_method); + auto poco_uri = Poco::URI(uri); + auto proxy_config = getProxyConfiguration(poco_uri.getScheme()); auto write_buffer = std::make_unique( - HTTPConnectionGroupType::STORAGE, Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config + HTTPConnectionGroupType::STORAGE, poco_uri, http_method, content_type, content_encoding, headers, timeouts, DBMS_DEFAULT_BUFFER_SIZE, proxy_config ); const auto & settings = context->getSettingsRef(); @@ -1038,7 +1039,8 @@ private: void ReadFromURL::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -1142,8 +1144,7 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate) return getFailoverOptions(next_uri, max_addresses); }); - if (num_streams > glob_iterator->size()) - num_streams = glob_iterator->size(); + num_streams = std::min(num_streams, glob_iterator->size()); } else { @@ -1327,6 +1328,7 @@ std::optional IStorageURLBase::tryGetLastModificationTime( .withBufSize(settings.max_read_buffer_size) .withRedirects(settings.max_http_get_redirects) .withHeaders(headers) + .withProxy(proxy_config) .create(credentials); return buf->tryGetLastModificationTime(); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 5aca3df1513..f550ccb2bc4 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -295,6 +295,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); struct Configuration : public StatelessTableEngineConfiguration diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index c80cdec74a2..a6334e7430d 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -35,6 +35,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index db568a1d5ab..016de94c17c 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -164,7 +164,7 @@ void StorageView::read( if (context->getSettingsRef().allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context, storage_snapshot), options); + InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context, storage_snapshot), options, column_names); interpreter.addStorageLimits(*query_info.storage_limits); query_plan = std::move(interpreter).extractQueryPlan(); } diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index c3a2e726365..899c3d5cf40 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -47,7 +47,6 @@ add_library(clickhouse_storages_system ${storages_system_sources}) target_link_libraries(clickhouse_storages_system PRIVATE dbms common - string_utils clickhouse_common_zookeeper clickhouse_parsers Poco::JSON diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp index 53399654c8d..456b7c4f90b 100644 --- a/src/Storages/System/IStorageSystemOneBlock.cpp +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -91,7 +91,8 @@ void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 8c6d29a3b70..49da1eba9ec 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -88,6 +88,7 @@ public: , total_tables(tables->size()), access(context->getAccess()) , query_id(context->getCurrentQueryId()), lock_acquire_timeout(context->getSettingsRef().lock_acquire_timeout) { + need_to_check_access_for_tables = !access->isGranted(AccessType::SHOW_COLUMNS); } String getName() const override { return "Columns"; } @@ -101,8 +102,6 @@ protected: MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); size_t rows_count = 0; - const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_COLUMNS); - while (rows_count < max_block_size && db_table_num < total_tables) { const std::string database_name = (*databases)[db_table_num].get(); @@ -138,13 +137,17 @@ protected: column_sizes = storage->getColumnSizes(); } - bool check_access_for_columns = check_access_for_tables && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name); + /// A shortcut: if we don't allow to list this table in SHOW TABLES, also exclude it from system.columns. + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) + continue; + + bool need_to_check_access_for_columns = need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name); size_t position = 0; for (const auto & column : columns) { ++position; - if (check_access_for_columns && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name, column.name)) + if (need_to_check_access_for_columns && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name, column.name)) continue; size_t src_index = 0; @@ -296,6 +299,7 @@ private: size_t db_table_num = 0; size_t total_tables; std::shared_ptr access; + bool need_to_check_access_for_tables; String query_id; std::chrono::milliseconds lock_acquire_timeout; }; @@ -338,7 +342,8 @@ private: void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } @@ -358,7 +363,6 @@ void StorageSystemColumns::read( auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); - auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( @@ -416,9 +420,10 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline, /// Add `table` column. MutableColumnPtr table_column_mut = ColumnString::create(); - IColumn::Offsets offsets(database_column->size()); + const auto num_databases = database_column->size(); + IColumn::Offsets offsets(num_databases); - for (size_t i = 0; i < database_column->size(); ++i) + for (size_t i = 0; i < num_databases; ++i) { const std::string database_name = (*database_column)[i].get(); if (database_name.empty()) diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 909599c00af..b42b070d518 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -48,6 +48,7 @@ const char * auto_contributors[] { "Alex Cao", "Alex Cheng", "Alex Karo", + "Alex Katsman", "Alex Krash", "Alex Ryndin", "Alex Zatelepin", @@ -101,6 +102,7 @@ const char * auto_contributors[] { "Alexey Korepanov", "Alexey Milovidov", "Alexey Perevyshin", + "Alexey Petrunyaka", "Alexey Tronov", "Alexey Vasiliev", "Alexey Zatelepin", @@ -109,6 +111,7 @@ const char * auto_contributors[] { "AlfVII", "Alfonso Martinez", "Alfred Xu", + "Ali", "Ali Demirci", "Aliaksandr Pliutau", "Aliaksandr Shylau", @@ -250,6 +253,7 @@ const char * auto_contributors[] { "Brian Hunter", "Brokenice0415", "Bulat Gaifullin", + "Caio Ricciuti", "Camden Cheek", "Camilo Sierra", "Carbyn", @@ -384,6 +388,7 @@ const char * auto_contributors[] { "Evgenii Pravda", "Evgeniia Sudarikova", "Evgeniy Gatov", + "Evgeniy Leko", "Evgeniy Udodov", "Evgeny", "Evgeny Konkov", @@ -413,6 +418,7 @@ const char * auto_contributors[] { "Fille", "Flowyi", "Francisco Barón", + "Francisco Javier Jurado Moreno", "Frank Chen", "Frank Zhao", "François Violette", @@ -425,6 +431,7 @@ const char * auto_contributors[] { "G5.Qin", "Gabriel", "Gabriel Archer", + "Gabriel Martinez", "Gagan Arneja", "Gagan Goel", "Gao Qiang", @@ -446,6 +453,7 @@ const char * auto_contributors[] { "Grigory Buteyko", "Grigory Pervakov", "GruffGemini", + "Grégoire Pineau", "Guillaume Tassery", "Guo Wangyang", "Guo Wei (William)", @@ -587,6 +595,7 @@ const char * auto_contributors[] { "Keiji Yoshida", "Ken Chen", "Ken MacInnis", + "KenL", "Kenji Noguchi", "Kerry Clendinning", "Kevin Chiang", @@ -640,6 +649,7 @@ const char * auto_contributors[] { "Leonardo Maciel", "Leonid Krylov", "Leopold Schabel", + "Leticia Webb", "Lev Borodin", "Lewinma", "Li Shuai", @@ -701,6 +711,7 @@ const char * auto_contributors[] { "Masha", "Mathieu Rey", "Matthew Peveler", + "Mattias Naarttijärvi", "Matwey V. Kornilov", "Max", "Max Akhmedov", @@ -711,6 +722,7 @@ const char * auto_contributors[] { "MaxTheHuman", "MaxWk", "Maxim Akhmedov", + "Maxim Alexeev", "Maxim Babenko", "Maxim Fedotov", "Maxim Fridental", @@ -739,6 +751,7 @@ const char * auto_contributors[] { "Michael Razuvaev", "Michael Schnerring", "Michael Smitasin", + "Michael Stetsyuk", "Michail Safronov", "Michal Lisowski", "MicrochipQ", @@ -879,6 +892,7 @@ const char * auto_contributors[] { "Pavlo Bashynskiy", "Pawel Rog", "Paweł Kudzia", + "Pazitiff9", "Peignon Melvyn", "Peng Jian", "Peng Liu", @@ -1084,6 +1098,7 @@ const char * auto_contributors[] { "Tom Bombadil", "Tom Risse", "Tomas Barton", + "Tomer Shafir", "Tomáš Hromada", "Tristan", "Tsarkova Anastasia", @@ -1123,6 +1138,7 @@ const char * auto_contributors[] { "Victor Krasnov", "Victor Tarnavsky", "Viktor Taranenko", + "Vinay Suryadevara", "Vincent", "Vincent Bernat", "Vitalii S", @@ -1162,6 +1178,9 @@ const char * auto_contributors[] { "Vladislav Smirnov", "Vladislav V", "Vojtech Splichal", + "Volodya", + "Volodya Giro", + "Volodyachan", "Volodymyr Kuznetsov", "Vsevolod Orlov", "Vxider", @@ -1179,6 +1198,7 @@ const char * auto_contributors[] { "XenoAmess", "Xianda Ke", "Xiang Zhou", + "Xiaofei Hu", "Xin Wang", "Xoel Lopez Barata", "Xudong Zhang", @@ -1224,6 +1244,7 @@ const char * auto_contributors[] { "Zhipeng", "Zhuo Qiu", "Zijie Lu", + "Zimu Li", "Ziy1-Tan", "Zoran Pandovski", "[데이터플랫폼팀] 이호선", @@ -1490,6 +1511,7 @@ const char * auto_contributors[] { "jiyoungyoooo", "jktng", "jkuklis", + "joe09@foxmail.com", "joelynch", "johanngan", "johnnymatthews", @@ -1658,6 +1680,7 @@ const char * auto_contributors[] { "ongkong", "orantius", "p0ny", + "p1rattttt", "palasonicq", "palegre-tiny", "pawelsz-rb", @@ -1667,6 +1690,7 @@ const char * auto_contributors[] { "pedro.riera", "pengxiangcai", "peshkurov", + "pet74alex", "peter279k", "philip.han", "pingyu", @@ -1680,6 +1704,7 @@ const char * auto_contributors[] { "pyos", "pzhdfy", "qaziqarta", + "qiangxuhui", "qianlixiang", "qianmoQ", "qieqieplus", @@ -1793,6 +1818,7 @@ const char * auto_contributors[] { "unknown", "urgordeadbeef", "usurai", + "v01dxyz", "vahid-sohrabloo", "vdimir", "velavokr", @@ -1802,6 +1828,7 @@ const char * auto_contributors[] { "vic", "vicdashkov", "vicgao", + "vinay92-ch", "vinity", "vitac", "vitstn", @@ -1818,6 +1845,7 @@ const char * auto_contributors[] { "weeds085490", "whysage", "wineternity", + "woodlzm", "wuxiaobai24", "wxybear", "wzl", @@ -1877,6 +1905,7 @@ const char * auto_contributors[] { "zhenjial", "zhifeng", "zhongyuankai", + "zhou", "zhoubintao", "zhukai", "zimv", @@ -1891,6 +1920,7 @@ const char * auto_contributors[] { "zxealous", "zy-kkk", "zzsmdfj", + "zzyReal666", "Šimon Podlipský", "Александр", "Александр Нам", diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index 635686780a0..f96b839a322 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -20,7 +20,7 @@ namespace fs = std::filesystem; namespace DB { -enum class Status +enum class Status : uint8_t { INACTIVE, ACTIVE, @@ -183,7 +183,10 @@ static void fillStatusColumns(MutableColumns & res_columns, size_t & col, { auto maybe_finished_status = finished_data_future.get(); if (maybe_finished_status.error == Coordination::Error::ZNONODE) - return fillStatusColumnsWithNulls(res_columns, col, Status::REMOVING); + { + fillStatusColumnsWithNulls(res_columns, col, Status::REMOVING); + return; + } /// asyncTryGet should throw on other error codes assert(maybe_finished_status.error == Coordination::Error::ZOK); @@ -199,9 +202,9 @@ static void fillStatusColumns(MutableColumns & res_columns, size_t & col, UInt64 query_finish_time_ms = maybe_finished_status.stat.ctime; /// query_finish_time - res_columns[col++]->insert(static_cast(query_finish_time_ms / 1000)); + res_columns[col++]->insert(query_finish_time_ms / 1000); /// query_duration_ms - res_columns[col++]->insert(static_cast(query_finish_time_ms - query_create_time_ms)); + res_columns[col++]->insert(query_finish_time_ms - query_create_time_ms); } diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp index 23d8fcfc481..9682fbc74a1 100644 --- a/src/Storages/System/StorageSystemDashboards.cpp +++ b/src/Storages/System/StorageSystemDashboards.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2afc03d0e5e..093adc59cc6 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -219,7 +219,8 @@ private: void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 2351c3c6a2a..1dbb187c418 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -102,7 +102,7 @@ static ColumnPtr getFilteredDatabases(const Databases & databases, const Actions void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const { const auto access = context->getAccess(); - const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES); const auto databases = DatabaseCatalog::instance().getDatabases(); ColumnPtr filtered_databases_column = getFilteredDatabases(databases, predicate, context); @@ -111,7 +111,7 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c { auto database_name = filtered_databases_column->getDataAt(i).toString(); - if (check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name)) + if (need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name)) continue; if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 31d566ef8b6..f48a8c67971 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -313,7 +313,8 @@ protected: void ReadFromSystemDetachedParts::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) { const auto * predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index 156fa5e5a9b..0836560dff0 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 629b11ee7f1..da700a7a4e9 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -1,17 +1,14 @@ #include #include -#include #include #include -#include #include #include #include -#include -#include #include + namespace DB { diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index b1ea2dd3f2b..175c0834bcb 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -263,7 +263,8 @@ ReadFromSystemPartsBase::ReadFromSystemPartsBase( void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) { const auto * predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index f34b0e0cfda..08cde25584b 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -65,6 +65,8 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_ {"column_data_uncompressed_bytes", std::make_shared(), "Total size of the decompressed data in the column, in bytes."}, {"column_marks_bytes", std::make_shared(), "The size of the marks for column, in bytes."}, {"column_modification_time", std::make_shared(std::make_shared()), "The last time the column was modified."}, + {"column_ttl_min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression of the column."}, + {"column_ttl_max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression of the column."}, {"serialization_kind", std::make_shared(), "Kind of serialization of a column"}, {"substreams", std::make_shared(std::make_shared()), "Names of substreams to which column is serialized"}, @@ -250,6 +252,21 @@ void StorageSystemPartsColumns::processNextStorage( else columns[res_index++]->insertDefault(); } + bool column_has_ttl = part->ttl_infos.columns_ttl.contains(column.name); + if (columns_mask[src_index++]) + { + if (column_has_ttl) + columns[res_index++]->insert(static_cast(part->ttl_infos.columns_ttl[column.name].min)); + else + columns[res_index++]->insertDefault(); + } + if (columns_mask[src_index++]) + { + if (column_has_ttl) + columns[res_index++]->insert(static_cast(part->ttl_infos.columns_ttl[column.name].max)); + else + columns[res_index++]->insertDefault(); + } auto serialization = part->getSerialization(column.name); if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 0a6e692f7e2..a5c496db7e7 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -62,6 +62,9 @@ private: /// Moves to the next disk in the list, if no more disks returns false bool nextDisk(); + /// Check if the path is a table path like "store/364/3643ff83-0996-4a4a-a90b-a96e66a10c74" + static bool isTablePath(const fs::path & path); + /// Returns full local path of the current file fs::path getCurrentPath() const { @@ -223,6 +226,19 @@ bool SystemRemoteDataPathsSource::nextDisk() return false; } +/// Check if the path is a table path like "store/364/3643ff83-0996-4a4a-a90b-a96e66a10c74" +bool SystemRemoteDataPathsSource::isTablePath(const fs::path & path) +{ + std::vector components; + for (auto it = path.begin(); it != path.end(); ++it) + components.push_back(it->string()); + + return components.size() == 3 + && components[0] == "store" + && components[1].size() == 3 /// "364" + && components[2].size() == 36; /// "3643ff83-0996-4a4a-a90b-a96e66a10c74" +} + bool SystemRemoteDataPathsSource::nextFile() { while (true) @@ -242,12 +258,12 @@ bool SystemRemoteDataPathsSource::nextFile() if (paths_stack.empty()) return false; + const auto current_path = getCurrentPath(); + try { const auto & disk = disks[current_disk].second; - const auto current_path = getCurrentPath(); - /// Files or directories can disappear due to concurrent operations if (!disk->exists(current_path)) continue; @@ -284,6 +300,11 @@ bool SystemRemoteDataPathsSource::nextFile() if (e.code() == std::errc::no_such_file_or_directory) continue; + /// Skip path if it's table path and we don't have permissions to read it + /// This can happen if the table is being dropped by first chmoding the directory to 000 + if (e.code() == std::errc::permission_denied && isTablePath(current_path)) + continue; + throw; } } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 5045dec3682..3bd5fd290db 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -141,6 +141,8 @@ public: if (thread_group) CurrentThread::attachToGroupIfDetached(thread_group); + setThreadName("SystemReplicas"); + try { ReplicatedTableStatus status; @@ -288,7 +290,8 @@ private: void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index a6bb7da2b6e..637182067f2 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -45,29 +45,27 @@ void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, co { for (const auto & [zookeeper_path, metadata] : S3QueueMetadataFactory::instance().getAll()) { - for (const auto & [file_name, file_status] : metadata->getFileStateses()) + for (const auto & [file_name, file_status] : metadata->getFileStatuses()) { size_t i = 0; res_columns[i++]->insert(zookeeper_path); res_columns[i++]->insert(file_name); - std::lock_guard lock(file_status->metadata_lock); - res_columns[i++]->insert(file_status->processed_rows.load()); - res_columns[i++]->insert(magic_enum::enum_name(file_status->state)); + res_columns[i++]->insert(magic_enum::enum_name(file_status->state.load())); if (file_status->processing_start_time) - res_columns[i++]->insert(file_status->processing_start_time); + res_columns[i++]->insert(file_status->processing_start_time.load()); else res_columns[i++]->insertDefault(); if (file_status->processing_end_time) - res_columns[i++]->insert(file_status->processing_end_time); + res_columns[i++]->insert(file_status->processing_end_time.load()); else res_columns[i++]->insertDefault(); ProfileEvents::dumpToMapColumn(file_status->profile_counters.getPartiallyAtomicSnapshot(), res_columns[i++].get(), true); - res_columns[i++]->insert(file_status->last_exception); + res_columns[i++]->insert(file_status->getException()); } } } diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index 634089bd1cd..b67a8b23e9d 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -1,9 +1,7 @@ #include #include -#include #include -#include -#include +#include #include #include #include @@ -11,6 +9,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -76,14 +77,14 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C { fillDataImpl(res_columns, StorageFile::getSchemaCache(context), "File"); #if USE_AWS_S3 - fillDataImpl(res_columns, StorageS3::getSchemaCache(context), "S3"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageS3Configuration::type_name), "S3"); #endif #if USE_HDFS - fillDataImpl(res_columns, StorageHDFS::getSchemaCache(context), "HDFS"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageHDFSConfiguration::type_name), "HDFS"); #endif fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL"); #if USE_AZURE_BLOB_STORAGE - fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureConfiguration::type_name), "Azure"); #endif } diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index 2092f352779..2e848f68850 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -23,7 +23,7 @@ namespace CurrentMetrics namespace DB { -enum class ChangeableWithoutRestart +enum class ChangeableWithoutRestart : uint8_t { No, IncreaseOnly, diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 9bd7ff945ad..783b899c978 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -146,7 +146,7 @@ ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr filter_by_engine = true; if (filter_by_engine) - engine_column= ColumnString::create(); + engine_column = ColumnString::create(); } for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) @@ -224,7 +224,7 @@ protected: MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); const auto access = context->getAccess(); - const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); size_t rows_count = 0; while (rows_count < max_block_size) @@ -348,7 +348,7 @@ protected: return Chunk(std::move(res_columns), num_rows); } - const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + const bool need_to_check_access_for_tables = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); if (!tables_it || !tables_it->isValid()) tables_it = database->getTablesIterator(context); @@ -361,7 +361,7 @@ protected: if (!tables.contains(table_name)) continue; - if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) continue; StoragePtr table = nullptr; @@ -750,7 +750,8 @@ void StorageSystemTables::read( void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index ce2e153ea66..a48b109fbbe 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -6,6 +7,7 @@ #include #include + namespace DB { @@ -93,7 +95,7 @@ StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multith Pipe StorageSystemZeros::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo &, + SelectQueryInfo & query_info, ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, @@ -123,8 +125,13 @@ Pipe StorageSystemZeros::read( { auto source = std::make_shared(max_block_size, limit ? *limit : 0, state); - if (limit && i == 0) - source->addTotalRowsApprox(*limit); + if (i == 0) + { + if (limit) + source->addTotalRowsApprox(*limit); + else if (query_info.limit) + source->addTotalRowsApprox(query_info.limit); + } res.addSource(std::move(source)); } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 604e29df0ec..cb46cd19517 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -104,7 +104,7 @@ struct ZkNodeCache auto request = zkutil::makeSetRequest(path, value, -1); requests.push_back(request); } - for (auto [_, child] : children) + for (const auto & [_, child] : children) child->generateRequests(requests); } }; @@ -166,7 +166,7 @@ public: }; /// Type of path to be fetched -enum class ZkPathType +enum class ZkPathType : uint8_t { Exact, /// Fetch all nodes under this path Prefix, /// Fetch all nodes starting with this prefix, recursively (multiple paths may match prefix) @@ -474,7 +474,8 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont void ReadFromSystemZooKeeper::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(added_filter_nodes); + paths = extractPath(added_filter_nodes.nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); } diff --git a/src/Storages/TTLMode.h b/src/Storages/TTLMode.h index 7f5fe0315c6..bbbdbee400a 100644 --- a/src/Storages/TTLMode.h +++ b/src/Storages/TTLMode.h @@ -4,7 +4,7 @@ namespace DB { -enum class TTLMode +enum class TTLMode : uint8_t { DELETE, MOVE, diff --git a/src/Storages/UVLoop.h b/src/Storages/UVLoop.h index dd1d64973d1..907a3fc0b13 100644 --- a/src/Storages/UVLoop.h +++ b/src/Storages/UVLoop.h @@ -57,9 +57,9 @@ public: } } - inline uv_loop_t * getLoop() { return loop_ptr.get(); } + uv_loop_t * getLoop() { return loop_ptr.get(); } - inline const uv_loop_t * getLoop() const { return loop_ptr.get(); } + const uv_loop_t * getLoop() const { return loop_ptr.get(); } private: std::unique_ptr loop_ptr; diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp new file mode 100644 index 00000000000..ff73888e19d --- /dev/null +++ b/src/Storages/Utils.cpp @@ -0,0 +1,30 @@ +#include +#include + + +namespace CurrentMetrics +{ + extern const Metric AttachedTable; + extern const Metric AttachedView; + extern const Metric AttachedDictionary; +} + + +namespace DB +{ + CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage) + { + if (storage->isView()) + { + return CurrentMetrics::AttachedView; + } + else if (storage->isDictionary()) + { + return CurrentMetrics::AttachedDictionary; + } + else + { + return CurrentMetrics::AttachedTable; + } + } +} diff --git a/src/Storages/Utils.h b/src/Storages/Utils.h new file mode 100644 index 00000000000..c86c2a4c341 --- /dev/null +++ b/src/Storages/Utils.h @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + + +namespace DB +{ + CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage); +} diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index e3cbff5f01b..cec55cefda2 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -219,7 +219,7 @@ void addRequestedPathFileAndSizeVirtualsToChunk( } } -static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allowed_inputs) +static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block * allowed_inputs) { std::stack nodes; nodes.push(node); @@ -228,7 +228,10 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo const auto * cur = nodes.top(); nodes.pop(); - if (cur->type == ActionsDAG::ActionType::INPUT && !allowed_inputs.has(cur->result_name)) + if (cur->type == ActionsDAG::ActionType::ARRAY_JOIN) + return false; + + if (cur->type == ActionsDAG::ActionType::INPUT && allowed_inputs && !allowed_inputs->has(cur->result_name)) return false; for (const auto * child : cur->children) @@ -336,7 +339,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( } } - if (allowed_inputs && !canEvaluateSubtree(node, *allowed_inputs)) + if (!canEvaluateSubtree(node, allowed_inputs)) return nullptr; return node; diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 04c26053dba..8bca1c97aad 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -297,7 +297,6 @@ namespace CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - UNREACHABLE(); } class AddingAggregatedChunkInfoTransform : public ISimpleTransform @@ -920,7 +919,6 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - UNREACHABLE(); } UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) @@ -948,7 +946,6 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - UNREACHABLE(); } void StorageWindowView::addFireSignal(std::set & signals) @@ -963,8 +960,7 @@ void StorageWindowView::addFireSignal(std::set & signals) void StorageWindowView::updateMaxTimestamp(UInt32 timestamp) { std::lock_guard lock(fire_signal_mutex); - if (timestamp > max_timestamp) - max_timestamp = timestamp; + max_timestamp = std::max(timestamp, max_timestamp); } void StorageWindowView::updateMaxWatermark(UInt32 watermark) @@ -1454,8 +1450,7 @@ void StorageWindowView::writeIntoWindowView( UInt32 watermark_lower_bound = addTime(t_max_watermark, window_view.slide_kind, -window_view.slide_num_units, *window_view.time_zone); - if (watermark_lower_bound < lateness_bound) - lateness_bound = watermark_lower_bound; + lateness_bound = std::min(watermark_lower_bound, lateness_bound); } } else if (!window_view.is_time_column_func_now) @@ -1551,10 +1546,7 @@ void StorageWindowView::writeIntoWindowView( const auto & timestamp_column = *block.getByName(window_view.timestamp_column_name).column; const auto & timestamp_data = typeid_cast(timestamp_column).getData(); for (const auto & timestamp : timestamp_data) - { - if (timestamp > block_max_timestamp) - block_max_timestamp = timestamp; - } + block_max_timestamp = std::max(timestamp, block_max_timestamp); } if (block_max_timestamp) diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 4f655f9b5e8..131712e750a 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -320,6 +320,8 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex auto replacement_map = visitor.getReplacementMap(); const auto & global_in_or_join_nodes = visitor.getGlobalInOrJoinNodes(); + QueryTreeNodePtrWithHashMap global_in_temporary_tables; + for (const auto & global_in_or_join_node : global_in_or_join_nodes) { if (auto * join_node = global_in_or_join_node.query_node->as()) @@ -364,15 +366,19 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION && in_function_node_type != QueryTreeNodeType::TABLE) continue; - auto subquery_to_execute = in_function_subquery_node; - if (subquery_to_execute->as()) - subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(subquery_to_execute, planner_context->getQueryContext()); + auto & temporary_table_expression_node = global_in_temporary_tables[in_function_subquery_node]; + if (!temporary_table_expression_node) + { + auto subquery_to_execute = in_function_subquery_node; + if (subquery_to_execute->as()) + subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(subquery_to_execute, planner_context->getQueryContext()); - auto temporary_table_expression_node = executeSubqueryNode(subquery_to_execute, - planner_context->getMutableQueryContext(), - global_in_or_join_node.subquery_depth); + temporary_table_expression_node = executeSubqueryNode(subquery_to_execute, + planner_context->getMutableQueryContext(), + global_in_or_join_node.subquery_depth); + } - in_function_subquery_node = std::move(temporary_table_expression_node); + replacement_map.emplace(in_function_subquery_node.get(), temporary_table_expression_node); } else { diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index cddfc9404d4..b4786b7313b 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -5,4 +5,4 @@ clickhouse_add_executable (merge_selector2 merge_selector2.cpp) target_link_libraries (merge_selector2 PRIVATE dbms) clickhouse_add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) -target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper string_utils) +target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) diff --git a/src/Storages/examples/merge_selector.cpp b/src/Storages/examples/merge_selector.cpp index a3b0d8a29ef..a60d1ce402c 100644 --- a/src/Storages/examples/merge_selector.cpp +++ b/src/Storages/examples/merge_selector.cpp @@ -82,8 +82,7 @@ int main(int, char **) if (in_range) { sum_merged_size += parts[i].size; - if (parts[i].level > max_level) - max_level = parts[i].level; + max_level = std::max(parts[i].level, max_level); } if (parts[i].data == selected_parts.back().data) diff --git a/src/Storages/examples/merge_selector2.cpp b/src/Storages/examples/merge_selector2.cpp index 029da26fad6..49cbb892dda 100644 --- a/src/Storages/examples/merge_selector2.cpp +++ b/src/Storages/examples/merge_selector2.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include @@ -22,11 +21,8 @@ int main(int, char **) IMergeSelector::PartsRanges partitions(1); IMergeSelector::PartsRange & parts = partitions.back(); -/* SimpleMergeSelector::Settings settings; - SimpleMergeSelector selector(settings);*/ - - LevelMergeSelector::Settings settings; - LevelMergeSelector selector(settings); + SimpleMergeSelector::Settings settings; + SimpleMergeSelector selector(settings); ReadBufferFromFileDescriptor in(STDIN_FILENO); @@ -40,7 +36,7 @@ int main(int, char **) IMergeSelector::Part part; in >> part.size >> "\t" >> part.age >> "\t" >> part.level >> "\t" >> part_names.back() >> "\n"; part.data = part_names.back().data(); -// part.level = 0; + part.level = 0; parts.emplace_back(part); sum_parts_size += part.size; } @@ -88,8 +84,7 @@ int main(int, char **) if (in_range) { sum_merged_size += parts[i].size; - if (parts[i].level > max_level) - max_level = parts[i].level; + max_level = std::max(parts[i].level, max_level); } if (parts[i].data == selected_parts.back().data) diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 26e953c0578..6ea7bdc312d 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -210,7 +210,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( auto type_name = type_col[i].get(); auto storage_column = storage_columns.tryGetPhysical(name); - if (storage_column && storage_column->type->hasDynamicSubcolumns()) + if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated()) res.add(ColumnDescription(std::move(name), DataTypeFactory::instance().get(type_name))); } } diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index dea9feaf28b..47542b7b47e 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -25,14 +25,13 @@ void registerStorageLiveView(StorageFactory & factory); void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); +void registerStorageLoop(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif #if USE_AWS_S3 void registerStorageS3(StorageFactory & factory); -void registerStorageCOS(StorageFactory & factory); -void registerStorageOSS(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); @@ -45,8 +44,6 @@ void registerStorageIceberg(StorageFactory & factory); #endif #if USE_HDFS -void registerStorageHDFS(StorageFactory & factory); - #if USE_HIVE void registerStorageHive(StorageFactory & factory); #endif @@ -99,9 +96,7 @@ void registerStorageSQLite(StorageFactory & factory); void registerStorageKeeperMap(StorageFactory & factory); -#if USE_AZURE_BLOB_STORAGE -void registerStorageAzureBlob(StorageFactory & factory); -#endif +void registerStorageObjectStorage(StorageFactory & factory); void registerStorages() { @@ -126,14 +121,12 @@ void registerStorages() registerStorageGenerateRandom(factory); registerStorageExecutable(factory); registerStorageWindowView(factory); + registerStorageLoop(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif #if USE_AWS_S3 - registerStorageS3(factory); - registerStorageCOS(factory); - registerStorageOSS(factory); registerStorageHudi(factory); registerStorageS3Queue(factory); @@ -148,12 +141,9 @@ void registerStorages() #endif #if USE_HDFS - registerStorageHDFS(factory); - #if USE_HIVE registerStorageHive(factory); #endif - #endif registerStorageODBC(factory); @@ -201,9 +191,7 @@ void registerStorages() registerStorageKeeperMap(factory); - #if USE_AZURE_BLOB_STORAGE - registerStorageAzureBlob(factory); - #endif + registerStorageObjectStorage(factory); } } diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 1946d8e8905..ed7f80e5df9 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -39,7 +39,7 @@ class Context; class ITableFunction : public std::enable_shared_from_this { public: - static inline std::string getDatabaseName() { return "_table_function"; } + static std::string getDatabaseName() { return "_table_function"; } /// Get the main function name. virtual std::string getName() const = 0; diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index 9f56d781bc9..28dc43f350b 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -1,13 +1,10 @@ #pragma once -#include "config.h" - #include #include #include #include -#include -#include +#include namespace DB diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 91165ba6705..fe6e5b3e593 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -1,15 +1,16 @@ #pragma once #include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include -#if USE_AWS_S3 - -# include -# include -# include -# include -# include -# include namespace DB { @@ -23,44 +24,76 @@ public: protected: StoragePtr executeImpl( - const ASTPtr & /*ast_function*/, + const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/, + ColumnsDescription cached_columns, bool /*is_insert_query*/) const override { ColumnsDescription columns; - if (TableFunction::configuration.structure != "auto") - columns = parseColumnsListFromString(TableFunction::configuration.structure, context); + auto configuration = TableFunction::getConfiguration(); + if (configuration->structure != "auto") + columns = parseColumnsListFromString(configuration->structure, context); + else if (!cached_columns.empty()) + columns = cached_columns; StoragePtr storage = Storage::create( - TableFunction::configuration, context, LoadingStrictnessLevel::CREATE, StorageID(TableFunction::getDatabaseName(), table_name), - columns, ConstraintsDescription{}, String{}, std::nullopt); + configuration, context, StorageID(TableFunction::getDatabaseName(), table_name), + columns, ConstraintsDescription{}, String{}, std::nullopt, LoadingStrictnessLevel::CREATE); storage->startup(); return storage; } - const char * getStorageTypeName() const override { return Storage::name; } + const char * getStorageTypeName() const override { return name; } - ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override { - if (TableFunction::configuration.structure == "auto") + auto configuration = TableFunction::getConfiguration(); + if (configuration->structure == "auto") { context->checkAccess(TableFunction::getSourceAccessType()); - return Storage::getTableStructureFromData(TableFunction::configuration, std::nullopt, context); + auto object_storage = TableFunction::getObjectStorage(context, !is_insert_query); + return Storage::getTableStructureFromData(object_storage, configuration, std::nullopt, context); + } + else + { + return parseColumnsListFromString(configuration->structure, context); } - - return parseColumnsListFromString(TableFunction::configuration.structure, context); } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override { + auto configuration = TableFunction::getConfiguration(); + configuration->format = "Parquet"; /// Set default format to Parquet if it's not specified in arguments. - TableFunction::configuration.format = "Parquet"; TableFunction::parseArguments(ast_function, context); } }; -} +struct TableFunctionIcebergName +{ + static constexpr auto name = "iceberg"; +}; + +struct TableFunctionDeltaLakeName +{ + static constexpr auto name = "deltaLake"; +}; + +struct TableFunctionHudiName +{ + static constexpr auto name = "hudi"; +}; + +#if USE_AWS_S3 +#if USE_AVRO +using TableFunctionIceberg = ITableFunctionDataLake; #endif +#if USE_PARQUET +using TableFunctionDeltaLake = ITableFunctionDataLake; +#endif +using TableFunctionHudi = ITableFunctionDataLake; +#endif + +} diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp deleted file mode 100644 index 275cd2a9cbb..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ /dev/null @@ -1,395 +0,0 @@ -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "registerTableFunctions.h" -#include -#include -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; -} - -namespace -{ - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) -{ - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - } - else - { - if (engine_args.size() < 3 || engine_args.size() > 8) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; - - if (engine_args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - configuration.structure = fourth_arg; - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); - if (is_format_arg(sixth_arg)) - configuration.format = sixth_arg; - else - configuration.structure = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - else if (engine_args.size() == 8) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - } -} - -void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - /// Clone ast function, because we can modify its arguments like removing headers. - auto ast_copy = ast_function->clone(); - - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); - - auto & args = args_func.at(0)->children; - - parseArgumentsImpl(args, context); -} - -void TableFunctionAzureBlobStorage::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context) -{ - if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) - { - /// In case of named collection, just add key-value pairs "format='...', structure='...'" - /// at the end of arguments to override existed format and structure with "auto" values. - if (collection->getOrDefault("format", "auto") == "auto") - { - ASTs format_equal_func_args = {std::make_shared("format"), std::make_shared(format)}; - auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); - args.push_back(format_equal_func); - } - if (collection->getOrDefault("structure", "auto") == "auto") - { - ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; - auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); - args.push_back(structure_equal_func); - } - } - else - { - if (args.size() < 3 || args.size() > 8) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - auto format_literal = std::make_shared(format); - auto structure_literal = std::make_shared(structure); - - for (auto & arg : args) - arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; - - /// (connection_string, container_name, blobpath) - if (args.size() == 3) - { - args.push_back(format_literal); - /// Add compression = "auto" before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - /// (connection_string, container_name, blobpath, structure) or - /// (connection_string, container_name, blobpath, format) - /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. - else if (args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); - /// (..., format) -> (..., format, compression, structure) - if (is_format_arg(fourth_arg)) - { - if (fourth_arg == "auto") - args[3] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - /// (..., structure) -> (..., format, compression, structure) - else - { - auto structure_arg = args.back(); - args[3] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - if (fourth_arg == "auto") - args.push_back(structure_literal); - else - args.push_back(structure_arg); - } - } - /// (connection_string, container_name, blobpath, format, compression) or - /// (storage_account_url, container_name, blobpath, account_name, account_key) - /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. - else if (args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - /// (..., format, compression) -> (..., format, compression, structure) - if (is_format_arg(fourth_arg)) - { - if (fourth_arg == "auto") - args[3] = format_literal; - args.push_back(structure_literal); - } - /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure) - else - { - args.push_back(format_literal); - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - } - /// (connection_string, container_name, blobpath, format, compression, structure) or - /// (storage_account_url, container_name, blobpath, account_name, account_key, structure) or - /// (storage_account_url, container_name, blobpath, account_name, account_key, format) - else if (args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - auto sixth_arg = checkAndGetLiteralArgument(args[5], "format/structure"); - - /// (..., format, compression, structure) - if (is_format_arg(fourth_arg)) - { - if (fourth_arg == "auto") - args[3] = format_literal; - if (checkAndGetLiteralArgument(args[5], "structure") == "auto") - args[5] = structure_literal; - } - /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure) - else if (is_format_arg(sixth_arg)) - { - if (sixth_arg == "auto") - args[5] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure) - else - { - auto structure_arg = args.back(); - args[5] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - if (sixth_arg == "auto") - args.push_back(structure_literal); - else - args.push_back(structure_arg); - } - } - /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression) - else if (args.size() == 7) - { - /// (..., format, compression) -> (..., format, compression, structure) - if (checkAndGetLiteralArgument(args[5], "format") == "auto") - args[5] = format_literal; - args.push_back(structure_literal); - } - /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) - else if (args.size() == 8) - { - if (checkAndGetLiteralArgument(args[5], "format") == "auto") - args[5] = format_literal; - if (checkAndGetLiteralArgument(args[7], "structure") == "auto") - args[7] = structure_literal; - } - } -} - -ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const -{ - if (configuration.structure == "auto") - { - context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container); - if (configuration.format == "auto") - return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; - return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); - } - - return parseColumnsListFromString(configuration.structure, context); -} - -bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const ContextPtr & context) -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); -} - -std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBeforeUsingStructureHint() const -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - -StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const -{ - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - ColumnsDescription columns; - if (configuration.structure != "auto") - columns = parseColumnsListFromString(configuration.structure, context); - else if (!structure_hint.empty()) - columns = structure_hint; - - StoragePtr storage = std::make_shared( - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - String{}, - /// No format_settings for table function Azure - std::nullopt, - /* distributed_processing */ false, - nullptr); - - storage->startup(); - - return storage; -} - -void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage.)", - .examples{{"azureBlobStorage", "SELECT * FROM azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, - .allow_readonly = false}); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h deleted file mode 100644 index 9622881b417..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - - -namespace DB -{ - -class Context; - -/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob. - */ -class TableFunctionAzureBlobStorage : public ITableFunction -{ -public: - static constexpr auto name = "azureBlobStorage"; - - static constexpr auto signature = " - connection_string, container_name, blobpath\n" - " - connection_string, container_name, blobpath, structure \n" - " - connection_string, container_name, blobpath, format \n" - " - connection_string, container_name, blobpath, format, compression \n" - " - connection_string, container_name, blobpath, format, compression, structure \n" - " - storage_account_url, container_name, blobpath, account_name, account_key\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; - - static size_t getMaxNumberOfArguments() { return 8; } - - String getName() const override - { - return name; - } - - virtual String getSignature() const - { - return signature; - } - - bool hasStaticStructure() const override { return configuration.structure != "auto"; } - - bool needStructureHint() const override { return configuration.structure == "auto"; } - - void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - - bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; - - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - - virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - - static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context); - -protected: - - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "Azure"; } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - mutable StorageAzureBlob::Configuration configuration; - ColumnsDescription structure_hint; -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp deleted file mode 100644 index 04dddca7672..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include - -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( - const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const -{ - StoragePtr storage; - ColumnsDescription columns; - - if (configuration.structure != "auto") - { - columns = parseColumnsListFromString(configuration.structure, context); - } - else if (!structure_hint.empty()) - { - columns = structure_hint; - } - - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this filename won't contains globs - storage = std::make_shared( - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings - /* distributed_processing */ true, - /*partition_by_=*/nullptr); - } - else - { - storage = std::make_shared( - cluster_name, - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - context); - } - - storage->startup(); - - return storage; -} - - -void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", - .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, - .allow_readonly = false} - ); -} - - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h deleted file mode 100644 index 58f79328f63..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure) - * A table function, which allows to process many files from Azure Blob Storage on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in Azure Blob Storage file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "azureBlobStorageCluster"; - static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp deleted file mode 100644 index b8bf810f6fa..00000000000 --- a/src/TableFunctions/TableFunctionDeltaLake.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 && USE_PARQUET - -#include -#include -#include -#include -#include "registerTableFunctions.h" - -namespace DB -{ - -struct TableFunctionDeltaLakeName -{ - static constexpr auto name = "deltaLake"; -}; - -using TableFunctionDeltaLake = ITableFunctionDataLake; - -void registerTableFunctionDeltaLake(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation = { - .description=R"(The table function can be used to read the DeltaLake table stored on object store.)", - .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp deleted file mode 100644 index 45829245551..00000000000 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "config.h" -#include "registerTableFunctions.h" - -#if USE_HDFS -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -StoragePtr TableFunctionHDFS::getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const -{ - return std::make_shared( - source, - StorageID(getDatabaseName(), table_name), - format_, - columns, - ConstraintsDescription{}, - String{}, - global_context, - compression_method_); -} - -ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const -{ - if (structure == "auto") - { - context->checkAccess(getSourceAccessType()); - if (format == "auto") - return StorageHDFS::getTableStructureAndFormatFromData(filename, compression_method, context).first; - return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context); - } - - return parseColumnsListFromString(structure, context); -} - -void registerTableFunctionHDFS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} -#endif diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h deleted file mode 100644 index f1c0b8a7eae..00000000000 --- a/src/TableFunctions/TableFunctionHDFS.h +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include - - -namespace DB -{ - -class Context; - -/* hdfs(URI, [format, structure, compression]) - creates a temporary storage from hdfs files - * - */ -class TableFunctionHDFS : public ITableFunctionFileLike -{ -public: - static constexpr auto name = "hdfs"; - static constexpr auto signature = " - uri\n" - " - uri, format\n" - " - uri, format, structure\n" - " - uri, format, structure, compression_method\n"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - -private: - StoragePtr getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const override; - const char * getStorageTypeName() const override { return "HDFS"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp deleted file mode 100644 index 57ce6d2b9ff..00000000000 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include - -#include -#include -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionHDFSCluster::getStorage( - const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context, - const std::string & table_name, const String & /*compression_method_*/) const -{ - StoragePtr storage; - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this uri won't contains globs - storage = std::make_shared( - filename, - StorageID(getDatabaseName(), table_name), - format, - columns, - ConstraintsDescription{}, - String{}, - context, - compression_method, - /*distributed_processing=*/true, - nullptr); - } - else - { - storage = std::make_shared( - context, - cluster_name, - filename, - StorageID(getDatabaseName(), table_name), - format, - columns, - ConstraintsDescription{}, - compression_method); - } - return storage; -} - -void registerTableFunctionHDFSCluster(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h deleted file mode 100644 index 0253217feb7..00000000000 --- a/src/TableFunctions/TableFunctionHDFSCluster.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * hdfsCluster(cluster, URI, format, structure, compression_method) - * A table function, which allows to process many files from HDFS on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in HDFS file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionHDFSCluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "hdfsCluster"; - static constexpr auto signature = " - cluster_name, uri\n" - " - cluster_name, uri, format\n" - " - cluster_name, uri, format, structure\n" - " - cluster_name, uri, format, structure, compression_method\n"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const override; - - const char * getStorageTypeName() const override { return "HDFSCluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp deleted file mode 100644 index 436e708b72d..00000000000 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include "registerTableFunctions.h" - -namespace DB -{ - -struct TableFunctionHudiName -{ - static constexpr auto name = "hudi"; -}; -using TableFunctionHudi = ITableFunctionDataLake; - -void registerTableFunctionHudi(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the Hudi table stored on object store.)", - .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} -} - -#endif diff --git a/src/TableFunctions/TableFunctionIceberg.cpp b/src/TableFunctions/TableFunctionIceberg.cpp deleted file mode 100644 index d37aace01c6..00000000000 --- a/src/TableFunctions/TableFunctionIceberg.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 && USE_AVRO - -#include -#include -#include -#include -#include "registerTableFunctions.h" - - -namespace DB -{ - -struct TableFunctionIcebergName -{ - static constexpr auto name = "iceberg"; -}; - -using TableFunctionIceberg = ITableFunctionDataLake; - -void registerTableFunctionIceberg(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the Iceberg table stored on object store.)", - .examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionLoop.cpp b/src/TableFunctions/TableFunctionLoop.cpp new file mode 100644 index 00000000000..43f122f6cb3 --- /dev/null +++ b/src/TableFunctions/TableFunctionLoop.cpp @@ -0,0 +1,155 @@ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "registerTableFunctions.h" + +namespace DB +{ + namespace ErrorCodes + { + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int UNKNOWN_TABLE; + } + namespace + { + class TableFunctionLoop : public ITableFunction + { + public: + static constexpr auto name = "loop"; + std::string getName() const override { return name; } + private: + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; + const char * getStorageTypeName() const override { return "Loop"; } + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + // save the inner table function AST + ASTPtr inner_table_function_ast; + // save database and table + std::string loop_database_name; + std::string loop_table_name; + }; + + } + + void TableFunctionLoop::parseArguments(const ASTPtr & ast_function, ContextPtr context) + { + const auto & args_func = ast_function->as(); + + if (!args_func.arguments) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function 'loop' must have arguments."); + + auto & args = args_func.arguments->children; + if (args.empty()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "No arguments provided for table function 'loop'"); + + if (args.size() == 1) + { + if (const auto * id = args[0]->as()) + { + String id_name = id->name(); + + size_t dot_pos = id_name.find('.'); + if (id_name.find('.', dot_pos + 1) != String::npos) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "There are more than one dot"); + if (dot_pos != String::npos) + { + loop_database_name = id_name.substr(0, dot_pos); + loop_table_name = id_name.substr(dot_pos + 1); + } + else + { + loop_table_name = id_name; + } + } + else if (const auto * func = args[0]->as()) + { + inner_table_function_ast = args[0]; + } + else + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected identifier or function for argument 1 of function 'loop', got {}", args[0]->getID()); + } + } + // loop(database, table) + else if (args.size() == 2) + { + args[0] = evaluateConstantExpressionForDatabaseName(args[0], context); + args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context); + + loop_database_name = checkAndGetLiteralArgument(args[0], "database"); + loop_table_name = checkAndGetLiteralArgument(args[1], "table"); + } + else + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function 'loop' must have 1 or 2 arguments."); + } + } + + ColumnsDescription TableFunctionLoop::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const + { + return ColumnsDescription(); + } + + StoragePtr TableFunctionLoop::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const + { + StoragePtr storage; + if (!inner_table_function_ast) + { + String database_name = loop_database_name; + if (database_name.empty()) + database_name = context->getCurrentDatabase(); + + auto database = DatabaseCatalog::instance().getDatabase(database_name); + storage = database->tryGetTable(loop_table_name, context); + if (!storage) + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table '{}' not found in database '{}'", loop_table_name, database_name); + } + else + { + auto inner_table_function = TableFunctionFactory::instance().get(inner_table_function_ast, context); + storage = inner_table_function->execute( + inner_table_function_ast, + context, + table_name, + std::move(cached_columns), + is_insert_query); + } + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), + storage + ); + res->startup(); + return res; + } + + void registerTableFunctionLoop(TableFunctionFactory & factory) + { + factory.registerFunction( + {.documentation + = {.description=R"(The table function can be used to continuously output query results in an infinite loop.)", + .examples{{"loop", "SELECT * FROM loop((numbers(3)) LIMIT 7", "0" + "1" + "2" + "0" + "1" + "2" + "0"}} + }}); + } + +} diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp new file mode 100644 index 00000000000..550d9cc799b --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -0,0 +1,226 @@ +#include "config.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +ObjectStoragePtr TableFunctionObjectStorage::getObjectStorage(const ContextPtr & context, bool create_readonly) const +{ + if (!object_storage) + object_storage = configuration->createObjectStorage(context, create_readonly); + return object_storage; +} + +template +StorageObjectStorage::ConfigurationPtr TableFunctionObjectStorage::getConfiguration() const +{ + if (!configuration) + configuration = std::make_shared(); + return configuration; +} + +template +std::vector TableFunctionObjectStorage::skipAnalysisForArguments( + const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +{ + auto & table_function_node = query_node_table_function->as(); + auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); + size_t table_function_arguments_size = table_function_arguments_nodes.size(); + + std::vector result; + for (size_t i = 0; i < table_function_arguments_size; ++i) + { + auto * function_node = table_function_arguments_nodes[i]->as(); + if (function_node && function_node->getFunctionName() == "headers") + result.push_back(i); + } + return result; +} + +template +void TableFunctionObjectStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + /// Clone ast function, because we can modify its arguments like removing headers. + auto ast_copy = ast_function->clone(); + ASTs & args_func = ast_copy->children; + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); + + auto & args = args_func.at(0)->children; + parseArgumentsImpl(args, context); +} + +template +ColumnsDescription TableFunctionObjectStorage< + Definition, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const +{ + if (configuration->structure == "auto") + { + context->checkAccess(getSourceAccessType()); + ColumnsDescription columns; + auto storage = getObjectStorage(context, !is_insert_query); + resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context); + return columns; + } + else + return parseColumnsListFromString(configuration->structure, context); +} + +template +StoragePtr TableFunctionObjectStorage::executeImpl( + const ASTPtr & /* ast_function */, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const +{ + ColumnsDescription columns; + chassert(configuration); + if (configuration->structure != "auto") + columns = parseColumnsListFromString(configuration->structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; + else if (!cached_columns.empty()) + columns = cached_columns; + + StoragePtr storage = std::make_shared( + configuration, + getObjectStorage(context, !is_insert_query), + context, + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + String{}, + /* format_settings */std::nullopt, + /* distributed_processing */false, + nullptr); + + storage->startup(); + return storage; +} + +void registerTableFunctionObjectStorage(TableFunctionFactory & factory) +{ + UNUSED(factory); +#if USE_AWS_S3 + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on AWS S3.)", + .examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); + + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on GCS.)", + .examples{{"gcs", "SELECT * FROM gcs(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); + + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on COSN.)", + .examples{{"cosn", "SELECT * FROM cosn(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on OSS.)", + .examples{{"oss", "SELECT * FROM oss(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); +#endif + +#if USE_AZURE_BLOB_STORAGE + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on Azure Blob Storage.)", + .examples{ + { + "azureBlobStorage", + "SELECT * FROM azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure])", "" + }} + }, + .allow_readonly = false + }); +#endif +#if USE_HDFS + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on HDFS virtual filesystem.)", + .examples{ + { + "hdfs", + "SELECT * FROM hdfs(url, format, compression, structure])", "" + }} + }, + .allow_readonly = false + }); +#endif +} + +#if USE_AZURE_BLOB_STORAGE +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +#if USE_AWS_S3 +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +#if USE_HDFS +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +} diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h new file mode 100644 index 00000000000..86b8f0d5e14 --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -0,0 +1,172 @@ +#pragma once + +#include "config.h" +#include +#include +#include +#include +#include + +namespace DB +{ + +class Context; +class StorageS3Configuration; +class StorageAzureConfiguration; +class StorageHDFSConfiguration; +struct S3StorageSettings; +struct AzureStorageSettings; +struct HDFSStorageSettings; + +struct AzureDefinition +{ + static constexpr auto name = "azureBlobStorage"; + static constexpr auto storage_type_name = "Azure"; + static constexpr auto signature = " - connection_string, container_name, blobpath\n" + " - connection_string, container_name, blobpath, structure \n" + " - connection_string, container_name, blobpath, format \n" + " - connection_string, container_name, blobpath, format, compression \n" + " - connection_string, container_name, blobpath, format, compression, structure \n" + " - storage_account_url, container_name, blobpath, account_name, account_key\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; + static constexpr auto max_number_of_arguments = 8; +}; + +struct S3Definition +{ + static constexpr auto name = "s3"; + static constexpr auto storage_type_name = "S3"; + static constexpr auto signature = " - url\n" + " - url, format\n" + " - url, format, structure\n" + " - url, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key\n" + " - url, access_key_id, secret_access_key, session_token\n" + " - url, access_key_id, secret_access_key, format\n" + " - url, access_key_id, secret_access_key, session_token, format\n" + " - url, access_key_id, secret_access_key, format, structure\n" + " - url, access_key_id, secret_access_key, session_token, format, structure\n" + " - url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + static constexpr auto max_number_of_arguments = 8; +}; + +struct GCSDefinition +{ + static constexpr auto name = "gcs"; + static constexpr auto storage_type_name = "GCS"; + static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; +}; + +struct COSNDefinition +{ + static constexpr auto name = "cosn"; + static constexpr auto storage_type_name = "COSN"; + static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; +}; + +struct OSSDefinition +{ + static constexpr auto name = "oss"; + static constexpr auto storage_type_name = "OSS"; + static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; +}; + +struct HDFSDefinition +{ + static constexpr auto name = "hdfs"; + static constexpr auto storage_type_name = "HDFS"; + static constexpr auto signature = " - uri\n" + " - uri, format\n" + " - uri, format, structure\n" + " - uri, format, structure, compression_method\n"; + static constexpr auto max_number_of_arguments = 4; +}; + +template +class TableFunctionObjectStorage : public ITableFunction +{ +public: + static constexpr auto name = Definition::name; + static constexpr auto signature = Definition::signature; + + static size_t getMaxNumberOfArguments() { return Definition::max_number_of_arguments; } + + String getName() const override { return name; } + + virtual String getSignature() const { return signature; } + + bool hasStaticStructure() const override { return configuration->structure != "auto"; } + + bool needStructureHint() const override { return configuration->structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override + { + return configuration->format != "auto" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); + } + + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); + } + + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context) + { + StorageObjectStorage::Configuration::initialize(*getConfiguration(), args, context, true); + } + + static void updateStructureAndFormatArgumentsIfNeeded( + ASTs & args, + const String & structure, + const String & format, + const ContextPtr & context) + { + Configuration().addStructureAndFormatToArgs(args, structure, format, context); + } + +protected: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return Definition::storage_type_name; } + + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ObjectStoragePtr getObjectStorage(const ContextPtr & context, bool create_readonly) const; + ConfigurationPtr getConfiguration() const; + + mutable ConfigurationPtr configuration; + mutable ObjectStoragePtr object_storage; + ColumnsDescription structure_hint; + + std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; +}; + +#if USE_AWS_S3 +using TableFunctionS3 = TableFunctionObjectStorage; +#endif + +#if USE_AZURE_BLOB_STORAGE +using TableFunctionAzureBlob = TableFunctionObjectStorage; +#endif + +#if USE_HDFS +using TableFunctionHDFS = TableFunctionObjectStorage; +#endif +} diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp new file mode 100644 index 00000000000..449bd2c8c49 --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -0,0 +1,118 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +template +StoragePtr TableFunctionObjectStorageCluster::executeImpl( + const ASTPtr & /*function*/, ContextPtr context, + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const +{ + auto configuration = Base::getConfiguration(); + + ColumnsDescription columns; + if (configuration->structure != "auto") + columns = parseColumnsListFromString(configuration->structure, context); + else if (!Base::structure_hint.empty()) + columns = Base::structure_hint; + else if (!cached_columns.empty()) + columns = cached_columns; + + auto object_storage = Base::getObjectStorage(context, !is_insert_query); + StoragePtr storage; + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + /// On worker node this filename won't contains globs + storage = std::make_shared( + configuration, + object_storage, + context, + StorageID(Base::getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + /* comment */String{}, + /* format_settings */std::nullopt, /// No format_settings + /* distributed_processing */true, + /*partition_by_=*/nullptr); + } + else + { + storage = std::make_shared( + ITableFunctionCluster::cluster_name, + configuration, + object_storage, + StorageID(Base::getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + context); + } + + storage->startup(); + return storage; +} + + +void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) +{ +#if USE_AWS_S3 + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on S3 in parallel for many nodes in a specified cluster.)", + .examples{{"s3Cluster", "SELECT * FROM s3Cluster(cluster, url, format, structure)", ""}}}, + .allow_readonly = false + } + ); +#endif + +#if USE_AZURE_BLOB_STORAGE + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", + .examples{{ + "azureBlobStorageCluster", + "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure])", ""}}}, + .allow_readonly = false + } + ); +#endif + +#if USE_HDFS + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on HDFS in parallel for many nodes in a specified cluster.)", + .examples{{"HDFSCluster", "SELECT * FROM HDFSCluster(cluster_name, uri, format)", ""}}}, + .allow_readonly = false + } + ); +#endif + + UNUSED(factory); +} + +#if USE_AWS_S3 +template class TableFunctionObjectStorageCluster; +#endif + +#if USE_AZURE_BLOB_STORAGE +template class TableFunctionObjectStorageCluster; +#endif + +#if USE_HDFS +template class TableFunctionObjectStorageCluster; +#endif +} diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h new file mode 100644 index 00000000000..296791b8bda --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h @@ -0,0 +1,102 @@ +#pragma once +#include "config.h" +#include +#include +#include + + +namespace DB +{ + +class Context; + +class StorageS3Settings; +class StorageAzureBlobSettings; +class StorageS3Configuration; +class StorageAzureConfiguration; + +struct AzureClusterDefinition +{ + static constexpr auto name = "azureBlobStorageCluster"; + static constexpr auto storage_type_name = "AzureBlobStorageCluster"; + static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; + static constexpr auto max_number_of_arguments = AzureDefinition::max_number_of_arguments + 1; +}; + +struct S3ClusterDefinition +{ + static constexpr auto name = "s3Cluster"; + static constexpr auto storage_type_name = "S3Cluster"; + static constexpr auto signature = " - cluster, url\n" + " - cluster, url, format\n" + " - cluster, url, format, structure\n" + " - cluster, url, access_key_id, secret_access_key\n" + " - cluster, url, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, format\n" + " - cluster, url, access_key_id, secret_access_key, format, structure\n" + " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments + 1; +}; + +struct HDFSClusterDefinition +{ + static constexpr auto name = "hdfsCluster"; + static constexpr auto storage_type_name = "HDFSCluster"; + static constexpr auto signature = " - cluster_name, uri\n" + " - cluster_name, uri, format\n" + " - cluster_name, uri, format, structure\n" + " - cluster_name, uri, format, structure, compression_method\n"; + static constexpr auto max_number_of_arguments = HDFSDefinition::max_number_of_arguments + 1; +}; + +/** +* Class implementing s3/hdfs/azureBlobStorage)Cluster(...) table functions, +* which allow to process many files from S3/HDFS/Azure blob storage on a specific cluster. +* On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks +* in file path and dispatch each file dynamically. +* On worker node it asks initiator about next task to process, processes it. +* This is repeated until the tasks are finished. +*/ +template +class TableFunctionObjectStorageCluster : public ITableFunctionCluster> +{ +public: + static constexpr auto name = Definition::name; + static constexpr auto signature = Definition::signature; + + String getName() const override { return name; } + String getSignature() const override { return signature; } + +protected: + using Base = TableFunctionObjectStorage; + + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return Definition::storage_type_name; } + + bool hasStaticStructure() const override { return Base::getConfiguration()->structure != "auto"; } + + bool needStructureHint() const override { return Base::getConfiguration()->structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { Base::structure_hint = structure_hint_; } +}; + +#if USE_AWS_S3 +using TableFunctionS3Cluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_AZURE_BLOB_STORAGE +using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_HDFS +using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster; +#endif +} diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp deleted file mode 100644 index a8c100ebd44..00000000000 --- a/src/TableFunctions/TableFunctionS3.cpp +++ /dev/null @@ -1,506 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "registerTableFunctions.h" -#include -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; -} - - -std::vector TableFunctionS3::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const -{ - auto & table_function_node = query_node_table_function->as(); - auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); - size_t table_function_arguments_size = table_function_arguments_nodes.size(); - - std::vector result; - - for (size_t i = 0; i < table_function_arguments_size; ++i) - { - auto * function_node = table_function_arguments_nodes[i]->as(); - if (function_node && function_node->getFunctionName() == "headers") - result.push_back(i); - } - - return result; -} - -/// This is needed to avoid copy-paste. Because s3Cluster arguments only differ in additional argument (first) - cluster name -void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context) -{ - if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context)) - { - StorageS3::processNamedCollectionResult(configuration, *named_collection); - if (configuration.format == "auto") - { - String file_path = named_collection->getOrDefault("filename", Poco::URI(named_collection->get("url")).getPath()); - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(file_path).value_or("auto"); - } - } - else - { - size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers_from_ast, context); - - if (count == 0 || count > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); - - std::unordered_map args_to_idx; - - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 3 possible variants: - /// - s3(source, format, structure) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - args_to_idx = {{"format", 1}, {"structure", 2}}; - else - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 4 possible variants: - /// - s3(source, format, structure, compression_method), - /// - s3(source, access_key_id, secret_access_key, format), - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}, {"structure", 3}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - { - args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - } - /// For 5 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, format, structure) - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, NOSIGN, format, structure, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no, - /// and by the 4-th argument, check if it's a format name or not - else if (count == 5) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - } - } - // For 6 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method) - /// - s3(source, access_key_id, secret_access_key, session_token, format, structure) - /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not - else if (count == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; - } - } - else if (count == 7) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}; - } - - /// This argument is always the first - String url = checkAndGetLiteralArgument(args[0], "url"); - configuration.url = S3::URI(url); - - if (args_to_idx.contains("format")) - { - auto format = checkAndGetLiteralArgument(args[args_to_idx["format"]], "format"); - /// Set format to configuration only of it's not 'auto', - /// because we can have default format set in configuration. - if (format != "auto") - configuration.format = format; - } - - if (args_to_idx.contains("structure")) - configuration.structure = checkAndGetLiteralArgument(args[args_to_idx["structure"]], "structure"); - - if (args_to_idx.contains("compression_method")) - configuration.compression_method = checkAndGetLiteralArgument(args[args_to_idx["compression_method"]], "compression_method"); - - if (args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(args[args_to_idx["access_key_id"]], "access_key_id"); - - if (args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); - - if (args_to_idx.contains("session_token")) - configuration.auth_settings.session_token = checkAndGetLiteralArgument(args[args_to_idx["session_token"]], "session_token"); - - configuration.auth_settings.no_sign_request = no_sign_request; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(url).getPath()).value_or("auto"); - } - - configuration.keys = {configuration.url.key}; -} - -void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - /// Clone ast function, because we can modify its arguments like removing headers. - auto ast_copy = ast_function->clone(); - - /// Parse args - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); - - auto & args = args_func.at(0)->children; - - parseArgumentsImpl(args, context); -} - -void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context) -{ - if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) - { - /// In case of named collection, just add key-value pairs "format='...', structure='...'" - /// at the end of arguments to override existed format and structure with "auto" values. - if (collection->getOrDefault("format", "auto") == "auto") - { - ASTs format_equal_func_args = {std::make_shared("format"), std::make_shared(format)}; - auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); - args.push_back(format_equal_func); - } - if (collection->getOrDefault("structure", "auto") == "auto") - { - ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; - auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); - args.push_back(structure_equal_func); - } - } - else - { - HTTPHeaderEntries tmp_headers; - size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); - - if (count == 0 || count > getMaxNumberOfArguments()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), count); - - auto format_literal = std::make_shared(format); - auto structure_literal = std::make_shared(structure); - - /// s3(s3_url) -> s3(s3_url, format, structure) - if (count == 1) - { - args.push_back(format_literal); - args.push_back(structure_literal); - } - /// s3(s3_url, format) -> s3(s3_url, format, structure) or - /// s3(s3_url, NOSIGN) -> s3(s3_url, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - else if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - args.push_back(format_literal); - else if (second_arg == "auto") - args.back() = format_literal; - args.push_back(structure_literal); - } - /// s3(source, format, structure) or - /// s3(source, access_key_id, secret_access_key) or - /// s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - /// s3(source, NOSIGN, format) -> s3(source, NOSIGN, format, structure) - if (boost::iequals(second_arg, "NOSIGN")) - { - if (checkAndGetLiteralArgument(args[2], "format") == "auto") - args.back() = format_literal; - args.push_back(structure_literal); - } - /// s3(source, format, structure) - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - { - if (second_arg == "auto") - args[1] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") - args[2] = structure_literal; - } - /// s3(source, access_key_id, access_key_id) -> s3(source, access_key_id, access_key_id, format, structure) - else - { - args.push_back(format_literal); - args.push_back(structure_literal); - } - } - /// s3(source, format, structure, compression_method) or - /// s3(source, access_key_id, secret_access_key, format) or - /// s3(source, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - /// s3(source, NOSIGN, format, structure) - if (boost::iequals(second_arg, "NOSIGN")) - { - if (checkAndGetLiteralArgument(args[2], "format") == "auto") - args[2] = format_literal; - if (checkAndGetLiteralArgument(args[3], "structure") == "auto") - args[3] = structure_literal; - } - /// s3(source, format, structure, compression_method) - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - { - if (second_arg == "auto") - args[1] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") - args[2] = structure_literal; - } - /// s3(source, access_key_id, access_key_id, format) -> s3(source, access_key_id, access_key_id, format, structure) - else - { - if (checkAndGetLiteralArgument(args[3], "format") == "auto") - args[3] = format_literal; - args.push_back(structure_literal); - } - } - /// s3(source, access_key_id, secret_access_key, format, structure) or - /// s3(source, NOSIGN, format, structure, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. - else if (count == 5) - { - auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - /// s3(source, NOSIGN, format, structure, compression_method) - if (boost::iequals(sedond_arg, "NOSIGN")) - { - if (checkAndGetLiteralArgument(args[2], "format") == "auto") - args[2] = format_literal; - if (checkAndGetLiteralArgument(args[3], "structure") == "auto") - args[3] = structure_literal; - } - /// s3(source, access_key_id, access_key_id, format, structure) - else - { - if (checkAndGetLiteralArgument(args[3], "format") == "auto") - args[3] = format_literal; - if (checkAndGetLiteralArgument(args[4], "structure") == "auto") - args[4] = structure_literal; - } - } - /// s3(source, access_key_id, secret_access_key, format, structure, compression) - else if (count == 6) - { - if (checkAndGetLiteralArgument(args[3], "format") == "auto") - args[3] = format_literal; - if (checkAndGetLiteralArgument(args[4], "structure") == "auto") - args[4] = structure_literal; - } - } -} - -ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const -{ - if (configuration.structure == "auto") - { - context->checkAccess(getSourceAccessType()); - configuration.update(context); - if (configuration.format == "auto") - return StorageS3::getTableStructureAndFormatFromData(configuration, std::nullopt, context).first; - - return StorageS3::getTableStructureFromData(configuration, std::nullopt, context); - } - - return parseColumnsListFromString(configuration.structure, context); -} - -bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context) -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); -} - -std::unordered_set TableFunctionS3::getVirtualsToCheckBeforeUsingStructureHint() const -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); -} - -StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool /*is_insert_query*/) const -{ - S3::URI s3_uri (configuration.url); - - ColumnsDescription columns; - if (configuration.structure != "auto") - columns = parseColumnsListFromString(configuration.structure, context); - else if (!structure_hint.empty()) - columns = structure_hint; - else if (!cached_columns.empty()) - columns = cached_columns; - - StoragePtr storage = std::make_shared( - configuration, - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - String{}, - /// No format_settings for table function S3 - std::nullopt); - - storage->startup(); - - return storage; -} - - -class TableFunctionGCS : public TableFunctionS3 -{ -public: - static constexpr auto name = "gcs"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "GCS"; } -}; - -class TableFunctionCOS : public TableFunctionS3 -{ -public: - static constexpr auto name = "cosn"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "COSN"; } -}; - -class TableFunctionOSS : public TableFunctionS3 -{ -public: - static constexpr auto name = "oss"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "OSS"; } -}; - - -void registerTableFunctionGCS(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Google Cloud Storage.)", - .examples{{"gcs", "SELECT * FROM gcs(url, hmac_key, hmac_secret)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - -void registerTableFunctionS3(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on AWS S3.)", - .examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - - -void registerTableFunctionCOS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -void registerTableFunctionOSS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h deleted file mode 100644 index 00ca36c6653..00000000000 --- a/src/TableFunctions/TableFunctionS3.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include - - -namespace DB -{ - -class Context; - -/* s3(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in S3. - */ -class TableFunctionS3 : public ITableFunction -{ -public: - static constexpr auto name = "s3"; - static constexpr auto signature = " - url\n" - " - url, format\n" - " - url, format, structure\n" - " - url, format, structure, compression_method\n" - " - url, access_key_id, secret_access_key\n" - " - url, access_key_id, secret_access_key, session_token\n" - " - url, access_key_id, secret_access_key, format\n" - " - url, access_key_id, secret_access_key, session_token, format\n" - " - url, access_key_id, secret_access_key, format, structure\n" - " - url, access_key_id, secret_access_key, session_token, format, structure\n" - " - url, access_key_id, secret_access_key, format, structure, compression_method\n" - " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" - "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; - - static size_t getMaxNumberOfArguments() { return 6; } - - String getName() const override - { - return name; - } - - virtual String getSignature() const - { - return signature; - } - - bool hasStaticStructure() const override { return configuration.structure != "auto"; } - - bool needStructureHint() const override { return configuration.structure == "auto"; } - - void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - - bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; - - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - - virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - - static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context); - -protected: - - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "S3"; } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - mutable StorageS3::Configuration configuration; - ColumnsDescription structure_hint; - -private: - - std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp deleted file mode 100644 index e727c4e4c89..00000000000 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include - -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionS3Cluster::executeImpl( - const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const -{ - StoragePtr storage; - ColumnsDescription columns; - - if (configuration.structure != "auto") - { - columns = parseColumnsListFromString(configuration.structure, context); - } - else if (!structure_hint.empty()) - { - columns = structure_hint; - } - - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this filename won't contains globs - storage = std::make_shared( - configuration, - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings for S3Cluster - /*distributed_processing=*/true); - } - else - { - storage = std::make_shared( - cluster_name, - configuration, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - context); - } - - storage->startup(); - - return storage; -} - - -void registerTableFunctionS3Cluster(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h deleted file mode 100644 index 718b0d90de8..00000000000 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure, compression_method) - * A table function, which allows to process many files from S3 on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in S3 file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionS3Cluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "s3Cluster"; - static constexpr auto signature = " - cluster, url\n" - " - cluster, url, format\n" - " - cluster, url, format, structure\n" - " - cluster, url, access_key_id, secret_access_key\n" - " - cluster, url, format, structure, compression_method\n" - " - cluster, url, access_key_id, secret_access_key, format\n" - " - cluster, url, access_key_id, secret_access_key, format, structure\n" - " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" - " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" - "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "S3Cluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index 7b2a61c25eb..4b56fa57091 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; } @@ -112,7 +112,7 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args = args_func.at(0)->children; if (args.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires at least 1 argument", getName()); + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Table function '{}' requires at least 1 argument", getName()); const auto & literal = args[0]->as(); String value; diff --git a/src/TableFunctions/registerDataLakeTableFunctions.cpp b/src/TableFunctions/registerDataLakeTableFunctions.cpp new file mode 100644 index 00000000000..15a6668f434 --- /dev/null +++ b/src/TableFunctions/registerDataLakeTableFunctions.cpp @@ -0,0 +1,69 @@ +#include +#include + +namespace DB +{ + +#if USE_AWS_S3 +#if USE_AVRO +void registerTableFunctionIceberg(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the Iceberg table stored on object store.)", + .examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +#if USE_PARQUET +void registerTableFunctionDeltaLake(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the DeltaLake table stored on object store.)", + .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +void registerTableFunctionHudi(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the Hudi table stored on object store.)", + .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +void registerDataLakeTableFunctions(TableFunctionFactory & factory) +{ + UNUSED(factory); +#if USE_AWS_S3 +#if USE_AVRO + registerTableFunctionIceberg(factory); +#endif +#if USE_PARQUET + registerTableFunctionDeltaLake(factory); +#endif + registerTableFunctionHudi(factory); +#endif +} + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 927457ff9f6..ca4913898f9 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -11,6 +11,7 @@ void registerTableFunctions() registerTableFunctionMerge(factory); registerTableFunctionRemote(factory); registerTableFunctionNumbers(factory); + registerTableFunctionLoop(factory); registerTableFunctionGenerateSeries(factory); registerTableFunctionNull(factory); registerTableFunctionZeros(factory); @@ -29,27 +30,6 @@ void registerTableFunctions() registerTableFunctionFuzzJSON(factory); #endif -#if USE_AWS_S3 - registerTableFunctionS3(factory); - registerTableFunctionS3Cluster(factory); - registerTableFunctionCOS(factory); - registerTableFunctionOSS(factory); - registerTableFunctionGCS(factory); - registerTableFunctionHudi(factory); -#if USE_PARQUET - registerTableFunctionDeltaLake(factory); -#endif -#if USE_AVRO - registerTableFunctionIceberg(factory); -#endif - -#endif - -#if USE_HDFS - registerTableFunctionHDFS(factory); - registerTableFunctionHDFSCluster(factory); -#endif - #if USE_HIVE registerTableFunctionHive(factory); #endif @@ -77,12 +57,9 @@ void registerTableFunctions() registerTableFunctionFormat(factory); registerTableFunctionExplain(factory); -#if USE_AZURE_BLOB_STORAGE - registerTableFunctionAzureBlobStorage(factory); - registerTableFunctionAzureBlobStorageCluster(factory); -#endif - - + registerTableFunctionObjectStorage(factory); + registerTableFunctionObjectStorageCluster(factory); + registerDataLakeTableFunctions(factory); } } diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 296af146faf..efde4d6dcdc 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -8,6 +8,7 @@ class TableFunctionFactory; void registerTableFunctionMerge(TableFunctionFactory & factory); void registerTableFunctionRemote(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); +void registerTableFunctionLoop(TableFunctionFactory & factory); void registerTableFunctionGenerateSeries(TableFunctionFactory & factory); void registerTableFunctionNull(TableFunctionFactory & factory); void registerTableFunctionZeros(TableFunctionFactory & factory); @@ -32,18 +33,6 @@ void registerTableFunctionS3Cluster(TableFunctionFactory & factory); void registerTableFunctionCOS(TableFunctionFactory & factory); void registerTableFunctionOSS(TableFunctionFactory & factory); void registerTableFunctionGCS(TableFunctionFactory & factory); -void registerTableFunctionHudi(TableFunctionFactory & factory); -#if USE_PARQUET -void registerTableFunctionDeltaLake(TableFunctionFactory & factory); -#endif -#if USE_AVRO -void registerTableFunctionIceberg(TableFunctionFactory & factory); -#endif -#endif - -#if USE_HDFS -void registerTableFunctionHDFS(TableFunctionFactory & factory); -void registerTableFunctionHDFSCluster(TableFunctionFactory & factory); #endif #if USE_HIVE @@ -74,10 +63,9 @@ void registerTableFunctionFormat(TableFunctionFactory & factory); void registerTableFunctionExplain(TableFunctionFactory & factory); -#if USE_AZURE_BLOB_STORAGE -void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory); -void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory); -#endif +void registerTableFunctionObjectStorage(TableFunctionFactory & factory); +void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory); +void registerDataLakeTableFunctions(TableFunctionFactory & factory); void registerTableFunctions(); diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index 7aaf18e7765..d41fdaf05ff 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -109,12 +109,12 @@ def main(): test_script = jobs_scripts[test_job] if report_file.exists(): report_file.unlink() - extra_timeout_option = "" - if test_job == JobNames.STATELESS_TEST_RELEASE: - extra_timeout_option = str(3600) # "bugfix" must be present in checkname, as integration test runner checks this check_name = f"Validate bugfix: {test_job}" - command = f"python3 {test_script} '{check_name}' {extra_timeout_option} --validate-bugfix --report-to-file {report_file}" + command = ( + f"python3 {test_script} '{check_name}' " + f"--validate-bugfix --report-to-file {report_file}" + ) print(f"Going to validate job [{test_job}], command [{command}]") _ = subprocess.run( command, diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index cc8e226e495..1d734fbb3f8 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 - +import json import logging import os import sys @@ -13,6 +13,8 @@ from env_helper import ( GITHUB_SERVER_URL, REPORT_PATH, TEMP_PATH, + CI_CONFIG_PATH, + CI, ) from pr_info import PRInfo from report import ( @@ -53,6 +55,18 @@ def main(): release=pr_info.is_release, backport=pr_info.head_ref.startswith("backport/"), ) + if CI: + # In CI only specific builds might be manually selected, or some wf does not build all builds. + # Filtering @builds_for_check to verify only builds that are present in the current CI workflow + with open(CI_CONFIG_PATH, encoding="utf-8") as jfd: + ci_config = json.load(jfd) + all_ci_jobs = ( + ci_config["jobs_data"]["jobs_to_skip"] + + ci_config["jobs_data"]["jobs_to_do"] + ) + builds_for_check = [job for job in builds_for_check if job in all_ci_jobs] + print(f"NOTE: following build reports will be accounted: [{builds_for_check}]") + required_builds = len(builds_for_check) missing_builds = 0 diff --git a/tests/ci/cache_utils.py b/tests/ci/cache_utils.py index a0692f4eff2..5a295fc66ca 100644 --- a/tests/ci/cache_utils.py +++ b/tests/ci/cache_utils.py @@ -197,7 +197,6 @@ class CargoCache(Cache): logging.info("Cache for Cargo.lock md5 %s will be uploaded", self.lock_hash) self._force_upload_cache = True self.directory.mkdir(parents=True, exist_ok=True) - return def upload(self): self._upload(f"{self.PREFIX}/{self.archive_name}", self._force_upload_cache) diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 9ee884c801a..578ade5c8a0 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -9,7 +9,7 @@ from threading import Thread from typing import Any, Dict, List, Optional import requests -from lambda_shared.pr import Labels, check_pr_description +from lambda_shared.pr import Labels from lambda_shared.token import get_cached_access_token NEED_RERUN_OR_CANCELL_WORKFLOWS = { @@ -321,21 +321,21 @@ def main(event): return if action == "edited": - print("PR is edited, check if the body is correct") - error, _ = check_pr_description( - pull_request["body"], pull_request["base"]["repo"]["full_name"] - ) - if error: - print( - f"The PR's body is wrong, is going to comment it. The error is: {error}" - ) - post_json = { - "body": "This is an automatic comment. The PR descriptions does not " - f"match the [template]({pull_request['base']['repo']['html_url']}/" - "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1).\n\n" - f"Please, edit it accordingly.\n\nThe error is: {error}" - } - _exec_post_with_retry(pull_request["comments_url"], token, json=post_json) + print("PR is edited - do nothing") + # error, _ = check_pr_description( + # pull_request["body"], pull_request["base"]["repo"]["full_name"] + # ) + # if error: + # print( + # f"The PR's body is wrong, is going to comment it. The error is: {error}" + # ) + # post_json = { + # "body": "This is an automatic comment. The PR descriptions does not " + # f"match the [template]({pull_request['base']['repo']['html_url']}/" + # "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1).\n\n" + # f"Please, edit it accordingly.\n\nThe error is: {error}" + # } + # _exec_post_with_retry(pull_request["comments_url"], token, json=post_json) return if action == "synchronize": diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 7f267d5ed1a..e470621e2c5 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -33,9 +33,10 @@ from subprocess import CalledProcessError from typing import List, Optional import __main__ + from env_helper import TEMP_PATH from get_robot_token import get_best_robot_token -from git_helper import git_runner, is_shallow +from git_helper import GIT_PREFIX, git_runner, is_shallow from github_helper import GitHub, PullRequest, PullRequests, Repository from lambda_shared_package.lambda_shared.pr import Labels from ssh import SSHKey @@ -90,7 +91,7 @@ close it. name: str, pr: PullRequest, repo: Repository, - backport_created_label: str = Labels.PR_BACKPORTS_CREATED, + backport_created_label: str, ): self.name = name self.pr = pr @@ -104,10 +105,6 @@ close it. self.backport_created_label = backport_created_label - self.git_prefix = ( # All commits to cherrypick are done as robot-clickhouse - "git -c user.email=robot-clickhouse@users.noreply.github.com " - "-c user.name=robot-clickhouse -c commit.gpgsign=false" - ) self.pre_check() def pre_check(self): @@ -118,11 +115,12 @@ close it. if branch_updated: self._backported = True - def pop_prs(self, prs: PullRequests) -> None: + def pop_prs(self, prs: PullRequests) -> PullRequests: """the method processes all prs and pops the ReleaseBranch related prs""" to_pop = [] # type: List[int] for i, pr in enumerate(prs): if self.name not in pr.head.ref: + # this pr is not for the current branch continue if pr.head.ref.startswith(f"cherrypick/{self.name}"): self.cherrypick_pr = pr @@ -131,19 +129,22 @@ close it. self.backport_pr = pr to_pop.append(i) else: - logging.error( - "head ref of PR #%s isn't starting with known suffix", - pr.number, - ) + assert False, f"BUG! Invalid PR's branch [{pr.head.ref}]" + + # Cherry-pick or backport PR found, set @backported flag for current release branch + self._backported = True + for i in reversed(to_pop): # Going from the tail to keep the order and pop greater index first prs.pop(i) + return prs def process( # pylint: disable=too-many-return-statements self, dry_run: bool ) -> None: if self.backported: return + if not self.cherrypick_pr: if dry_run: logging.info( @@ -151,56 +152,54 @@ close it. ) return self.create_cherrypick() - if self.backported: - return - if self.cherrypick_pr is not None: - # Try to merge cherrypick instantly - if self.cherrypick_pr.mergeable and self.cherrypick_pr.state != "closed": - if dry_run: - logging.info( - "DRY RUN: Would merge cherry-pick PR for #%s", self.pr.number - ) - return - self.cherrypick_pr.merge() - # The PR needs update, since PR.merge doesn't update the object - self.cherrypick_pr.update() - if self.cherrypick_pr.merged: - if dry_run: - logging.info( - "DRY RUN: Would create backport PR for #%s", self.pr.number - ) - return - self.create_backport() - return - if self.cherrypick_pr.state == "closed": + assert self.cherrypick_pr, "BUG!" + + if self.cherrypick_pr.mergeable and self.cherrypick_pr.state != "closed": + if dry_run: logging.info( - "The cherrypick PR #%s for PR #%s is discarded", - self.cherrypick_pr.number, - self.pr.number, + "DRY RUN: Would merge cherry-pick PR for #%s", self.pr.number ) - self._backported = True return + self.cherrypick_pr.merge() + # The PR needs update, since PR.merge doesn't update the object + self.cherrypick_pr.update() + if self.cherrypick_pr.merged: + if dry_run: + logging.info( + "DRY RUN: Would create backport PR for #%s", self.pr.number + ) + return + self.create_backport() + return + if self.cherrypick_pr.state == "closed": logging.info( - "Cherrypick PR #%s for PR #%s have conflicts and unable to be merged", + "The cherry-pick PR #%s for PR #%s is discarded", self.cherrypick_pr.number, self.pr.number, ) - self.ping_cherry_pick_assignees(dry_run) + self._backported = True + return + logging.info( + "Cherry-pick PR #%s for PR #%s has conflicts and unable to be merged", + self.cherrypick_pr.number, + self.pr.number, + ) + self.ping_cherry_pick_assignees(dry_run) def create_cherrypick(self): # First, create backport branch: # Checkout release branch with discarding every change - git_runner(f"{self.git_prefix} checkout -f {self.name}") + git_runner(f"{GIT_PREFIX} checkout -f {self.name}") # Create or reset backport branch - git_runner(f"{self.git_prefix} checkout -B {self.backport_branch}") + git_runner(f"{GIT_PREFIX} checkout -B {self.backport_branch}") # Merge all changes from PR's the first parent commit w/o applying anything # It will allow to create a merge commit like it would be a cherry-pick first_parent = git_runner(f"git rev-parse {self.pr.merge_commit_sha}^1") - git_runner(f"{self.git_prefix} merge -s ours --no-edit {first_parent}") + git_runner(f"{GIT_PREFIX} merge -s ours --no-edit {first_parent}") # Second step, create cherrypick branch git_runner( - f"{self.git_prefix} branch -f " + f"{GIT_PREFIX} branch -f " f"{self.cherrypick_branch} {self.pr.merge_commit_sha}" ) @@ -209,7 +208,7 @@ close it. # manually to the release branch already try: output = git_runner( - f"{self.git_prefix} merge --no-commit --no-ff {self.cherrypick_branch}" + f"{GIT_PREFIX} merge --no-commit --no-ff {self.cherrypick_branch}" ) # 'up-to-date', 'up to date', who knows what else (╯°v°)╯ ^┻━┻ if output.startswith("Already up") and output.endswith("date."): @@ -219,18 +218,17 @@ close it. self.name, self.pr.number, ) - self._backported = True return except CalledProcessError: # There are most probably conflicts, they'll be resolved in PR - git_runner(f"{self.git_prefix} reset --merge") + git_runner(f"{GIT_PREFIX} reset --merge") else: # There are changes to apply, so continue - git_runner(f"{self.git_prefix} reset --merge") + git_runner(f"{GIT_PREFIX} reset --merge") - # Push, create the cherrypick PR, lable and assign it + # Push, create the cherry-pick PR, label and assign it for branch in [self.cherrypick_branch, self.backport_branch]: - git_runner(f"{self.git_prefix} push -f {self.REMOTE} {branch}:{branch}") + git_runner(f"{GIT_PREFIX} push -f {self.REMOTE} {branch}:{branch}") self.cherrypick_pr = self.repo.create_pull( title=f"Cherry pick #{self.pr.number} to {self.name}: {self.pr.title}", @@ -245,6 +243,11 @@ close it. ) self.cherrypick_pr.add_to_labels(Labels.PR_CHERRYPICK) self.cherrypick_pr.add_to_labels(Labels.DO_NOT_TEST) + if Labels.PR_CRITICAL_BUGFIX in [label.name for label in self.pr.labels]: + self.cherrypick_pr.add_to_labels(Labels.PR_CRITICAL_BUGFIX) + elif Labels.PR_BUGFIX in [label.name for label in self.pr.labels]: + self.cherrypick_pr.add_to_labels(Labels.PR_BUGFIX) + self._backported = True self._assign_new_pr(self.cherrypick_pr) # update cherrypick PR to get the state for PR.mergable self.cherrypick_pr.update() @@ -254,21 +257,19 @@ close it. # Checkout the backport branch from the remote and make all changes to # apply like they are only one cherry-pick commit on top of release logging.info("Creating backport for PR #%s", self.pr.number) - git_runner(f"{self.git_prefix} checkout -f {self.backport_branch}") - git_runner( - f"{self.git_prefix} pull --ff-only {self.REMOTE} {self.backport_branch}" - ) + git_runner(f"{GIT_PREFIX} checkout -f {self.backport_branch}") + git_runner(f"{GIT_PREFIX} pull --ff-only {self.REMOTE} {self.backport_branch}") merge_base = git_runner( - f"{self.git_prefix} merge-base " + f"{GIT_PREFIX} merge-base " f"{self.REMOTE}/{self.name} {self.backport_branch}" ) - git_runner(f"{self.git_prefix} reset --soft {merge_base}") + git_runner(f"{GIT_PREFIX} reset --soft {merge_base}") title = f"Backport #{self.pr.number} to {self.name}: {self.pr.title}" - git_runner(f"{self.git_prefix} commit --allow-empty -F -", input=title) + git_runner(f"{GIT_PREFIX} commit --allow-empty -F -", input=title) # Push with force, create the backport PR, lable and assign it git_runner( - f"{self.git_prefix} push -f {self.REMOTE} " + f"{GIT_PREFIX} push -f {self.REMOTE} " f"{self.backport_branch}:{self.backport_branch}" ) self.backport_pr = self.repo.create_pull( @@ -280,6 +281,10 @@ close it. head=self.backport_branch, ) self.backport_pr.add_to_labels(Labels.PR_BACKPORT) + if Labels.PR_CRITICAL_BUGFIX in [label.name for label in self.pr.labels]: + self.backport_pr.add_to_labels(Labels.PR_CRITICAL_BUGFIX) + elif Labels.PR_BUGFIX in [label.name for label in self.pr.labels]: + self.backport_pr.add_to_labels(Labels.PR_BUGFIX) self._assign_new_pr(self.backport_pr) def ping_cherry_pick_assignees(self, dry_run: bool) -> None: @@ -335,7 +340,7 @@ close it. @property def backported(self) -> bool: - return self._backported or self.backport_pr is not None + return self._backported def __repr__(self): return self.name @@ -348,16 +353,22 @@ class Backport: repo: str, fetch_from: Optional[str], dry_run: bool, - must_create_backport_labels: List[str], - backport_created_label: str, ): self.gh = gh self._repo_name = repo self._fetch_from = fetch_from self.dry_run = dry_run - self.must_create_backport_labels = must_create_backport_labels - self.backport_created_label = backport_created_label + self.must_create_backport_label = ( + Labels.MUST_BACKPORT + if self._repo_name == self._fetch_from + else Labels.MUST_BACKPORT_CLOUD + ) + self.backport_created_label = ( + Labels.PR_BACKPORTS_CREATED + if self._repo_name == self._fetch_from + else Labels.PR_BACKPORTS_CREATED_CLOUD + ) self._remote = "" self._remote_line = "" @@ -457,7 +468,7 @@ class Backport: query_args = { "query": f"type:pr repo:{self._fetch_from} -label:{self.backport_created_label}", "label": ",".join( - self.labels_to_backport + self.must_create_backport_labels + self.labels_to_backport + [self.must_create_backport_label] ), "merged": [since_date, tomorrow], } @@ -474,23 +485,19 @@ class Backport: self.process_pr(pr) except Exception as e: logging.error( - "During processing the PR #%s error occured: %s", pr.number, e + "During processing the PR #%s error occurred: %s", pr.number, e ) self.error = e def process_pr(self, pr: PullRequest) -> None: pr_labels = [label.name for label in pr.labels] - for label in self.must_create_backport_labels: - # We backport any vXXX-must-backport to all branches of the fetch repo (better than no backport) - if label in pr_labels or self._fetch_from: - branches = [ - ReleaseBranch(br, pr, self.repo, self.backport_created_label) - for br in self.release_branches - ] # type: List[ReleaseBranch] - break - - if not branches: + if self.must_create_backport_label in pr_labels: + branches = [ + ReleaseBranch(br, pr, self.repo, self.backport_created_label) + for br in self.release_branches + ] # type: List[ReleaseBranch] + else: branches = [ ReleaseBranch(br, pr, self.repo, self.backport_created_label) for br in [ @@ -499,20 +506,14 @@ class Backport: if label in self.labels_to_backport ] ] - if not branches: - # This is definitely some error. There must be at least one branch - # It also make the whole program exit code non-zero - self.error = Exception( - f"There are no branches to backport PR #{pr.number}, logical error" - ) - raise self.error + assert branches, "BUG!" logging.info( " PR #%s is supposed to be backported to %s", pr.number, ", ".join(map(str, branches)), ) - # All PRs for cherrypick and backport branches as heads + # All PRs for cherry-pick and backport branches as heads query_suffix = " ".join( [ f"head:{branch.backport_branch} head:{branch.cherrypick_branch}" @@ -524,29 +525,15 @@ class Backport: label=f"{Labels.PR_BACKPORT},{Labels.PR_CHERRYPICK}", ) for br in branches: - br.pop_prs(bp_cp_prs) - - if bp_cp_prs: - # This is definitely some error. All prs must be consumed by - # branches with ReleaseBranch.pop_prs. It also makes the whole - # program exit code non-zero - self.error = Exception( - "The following PRs are not filtered by release branches:\n" - "\n".join(map(str, bp_cp_prs)) - ) - raise self.error - - if all(br.backported for br in branches): - # Let's check if the PR is already backported - self.mark_pr_backported(pr) - return + bp_cp_prs = br.pop_prs(bp_cp_prs) + assert not bp_cp_prs, "BUG!" for br in branches: br.process(self.dry_run) - if all(br.backported for br in branches): - # And check it after the running - self.mark_pr_backported(pr) + for br in branches: + assert br.backported, f"BUG! backport to branch [{br}] failed" + self.mark_pr_backported(pr) def mark_pr_backported(self, pr: PullRequest) -> None: if self.dry_run: @@ -583,19 +570,6 @@ def parse_args(): ) parser.add_argument("--dry-run", action="store_true", help="do not create anything") - parser.add_argument( - "--must-create-backport-label", - default=Labels.MUST_BACKPORT, - choices=(Labels.MUST_BACKPORT, Labels.MUST_BACKPORT_CLOUD), - help="label to filter PRs to backport", - nargs="+", - ) - parser.add_argument( - "--backport-created-label", - default=Labels.PR_BACKPORTS_CREATED, - choices=(Labels.PR_BACKPORTS_CREATED, Labels.PR_BACKPORTS_CREATED_CLOUD), - help="label to mark PRs as backported", - ) parser.add_argument( "--reserve-search-days", default=0, @@ -660,10 +634,6 @@ def main(): args.repo, args.from_repo, args.dry_run, - args.must_create_backport_label - if isinstance(args.must_create_backport_label, list) - else [args.must_create_backport_label], - args.backport_created_label, ) # https://github.com/python/mypy/issues/3004 bp.gh.cache_path = temp_path / "gh_cache" diff --git a/tests/ci/ci.py b/tests/ci/ci.py index a95ada628ff..55a18a2f335 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -3,22 +3,26 @@ import concurrent.futures import json import logging import os -import random import re import subprocess import sys -import time -from copy import deepcopy -from dataclasses import asdict, dataclass -from enum import Enum +from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union +from typing import Any, Dict, List, Optional import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames -from ci_utils import GHActions, is_hex, normalize_string +from ci_config import ( + CI_CONFIG, + Build, + CILabels, + CIStages, + JobNames, + StatusNames, +) +from ci_metadata import CiMetadata +from ci_utils import GHActions, normalize_string from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -34,16 +38,14 @@ from commit_status_helper import ( get_commit, post_commit_status, set_status_comment, - update_mergeable_check, ) -from digest_helper import DockerDigester, JobDigester +from digest_helper import DockerDigester from env_helper import ( CI, GITHUB_JOB_API_URL, - GITHUB_RUN_URL, + GITHUB_REPOSITORY, + GITHUB_RUN_ID, REPO_COPY, - REPORT_PATH, - S3_BUILDS_BUCKET, TEMP_PATH, ) from get_robot_token import get_best_robot_token @@ -51,930 +53,17 @@ from git_helper import GIT_PREFIX, Git from git_helper import Runner as GitRunner from github_helper import GitHub from pr_info import PRInfo -from report import ERROR, SUCCESS, BuildResult, JobReport +from report import ERROR, FAILURE, PENDING, SUCCESS, BuildResult, JobReport, TestResult from s3_helper import S3Helper +from stopwatch import Stopwatch +from tee_popen import TeePopen +from ci_cache import CiCache +from ci_settings import CiSettings from version_helper import get_version_from_repo # pylint: disable=too-many-lines -@dataclass -class PendingState: - updated_at: float - run_url: str - - -class CiCache: - """ - CI cache is a bunch of records. Record is a file stored under special location on s3. - The file name has following format - - _[]--___.ci - - RECORD_TYPE: - SUCCESSFUL - for successfuly finished jobs - PENDING - for pending jobs - - ATTRIBUTES: - release - for jobs being executed on the release branch including master branch (not a PR branch) - """ - - _S3_CACHE_PREFIX = "CI_cache_v1" - _CACHE_BUILD_REPORT_PREFIX = "build_report" - _RECORD_FILE_EXTENSION = ".ci" - _LOCAL_CACHE_PATH = Path(TEMP_PATH) / "ci_cache" - _ATTRIBUTE_RELEASE = "release" - # divider symbol 1 - _DIV1 = "--" - # divider symbol 2 - _DIV2 = "_" - assert _DIV1 != _DIV2 - - class RecordType(Enum): - SUCCESSFUL = "successful" - PENDING = "pending" - FAILED = "failed" - - @dataclass - class Record: - record_type: "CiCache.RecordType" - job_name: str - job_digest: str - batch: int - num_batches: int - release_branch: bool - file: str = "" - - def to_str_key(self): - """other fields must not be included in the hash str""" - return "_".join( - [self.job_name, self.job_digest, str(self.batch), str(self.num_batches)] - ) - - class JobType(Enum): - DOCS = "DOCS" - SRCS = "SRCS" - - @classmethod - def is_docs_job(cls, job_name: str) -> bool: - return job_name == JobNames.DOCS_CHECK - - @classmethod - def is_srcs_job(cls, job_name: str) -> bool: - return not cls.is_docs_job(job_name) - - @classmethod - def get_type_by_name(cls, job_name: str) -> "CiCache.JobType": - res = cls.SRCS - if cls.is_docs_job(job_name): - res = cls.DOCS - elif cls.is_srcs_job(job_name): - res = cls.SRCS - else: - assert False - return res - - def __init__( - self, - s3: S3Helper, - job_digests: Dict[str, str], - ): - self.s3 = s3 - self.job_digests = job_digests - self.cache_s3_paths = { - job_type: f"{self._S3_CACHE_PREFIX}/{job_type.value}-{self._get_digest_for_job_type(self.job_digests, job_type)}/" - for job_type in self.JobType - } - self.s3_record_prefixes = { - record_type: record_type.value for record_type in self.RecordType - } - self.records: Dict["CiCache.RecordType", Dict[str, "CiCache.Record"]] = { - record_type: {} for record_type in self.RecordType - } - - self.cache_updated = False - self.cache_data_fetched = True - if not self._LOCAL_CACHE_PATH.exists(): - self._LOCAL_CACHE_PATH.mkdir(parents=True, exist_ok=True) - - def _get_digest_for_job_type( - self, job_digests: Dict[str, str], job_type: JobType - ) -> str: - if job_type == self.JobType.DOCS: - res = job_digests[JobNames.DOCS_CHECK] - elif job_type == self.JobType.SRCS: - # any build type job has the same digest - pick up Build.PACKAGE_RELEASE or Build.PACKAGE_ASAN as a failover - # Build.PACKAGE_RELEASE may not exist in the list if we have reduced CI pipeline - if Build.PACKAGE_RELEASE in job_digests: - res = job_digests[Build.PACKAGE_RELEASE] - elif Build.PACKAGE_ASAN in job_digests: - # failover, if failover does not work - fix it! - res = job_digests[Build.PACKAGE_ASAN] - else: - assert False, "BUG, no build job in digest' list" - else: - assert False, "BUG, New JobType? - please update func" - return res - - def _get_record_file_name( - self, - record_type: RecordType, - job_name: str, - batch: int, - num_batches: int, - release_branch: bool, - ) -> str: - prefix = self.s3_record_prefixes[record_type] - prefix_extended = ( - self._DIV2.join([prefix, self._ATTRIBUTE_RELEASE]) - if release_branch - else prefix - ) - assert self._DIV1 not in job_name, f"Invalid job name {job_name}" - job_name = self._DIV2.join( - [job_name, self.job_digests[job_name], str(batch), str(num_batches)] - ) - file_name = self._DIV1.join([prefix_extended, job_name]) - file_name += self._RECORD_FILE_EXTENSION - return file_name - - def _get_record_s3_path(self, job_name: str) -> str: - return self.cache_s3_paths[self.JobType.get_type_by_name(job_name)] - - def _parse_record_file_name( - self, record_type: RecordType, file_name: str - ) -> Optional["CiCache.Record"]: - # validate filename - if ( - not file_name.endswith(self._RECORD_FILE_EXTENSION) - or not len(file_name.split(self._DIV1)) == 2 - ): - print("ERROR: wrong file name format") - return None - - file_name = file_name.removesuffix(self._RECORD_FILE_EXTENSION) - release_branch = False - - prefix_extended, job_suffix = file_name.split(self._DIV1) - record_type_and_attribute = prefix_extended.split(self._DIV2) - - # validate filename prefix - failure = False - if not 0 < len(record_type_and_attribute) <= 2: - print("ERROR: wrong file name prefix") - failure = True - if ( - len(record_type_and_attribute) > 1 - and record_type_and_attribute[1] != self._ATTRIBUTE_RELEASE - ): - print("ERROR: wrong record attribute") - failure = True - if record_type_and_attribute[0] != self.s3_record_prefixes[record_type]: - print("ERROR: wrong record type") - failure = True - if failure: - return None - - if ( - len(record_type_and_attribute) > 1 - and record_type_and_attribute[1] == self._ATTRIBUTE_RELEASE - ): - release_branch = True - - job_properties = job_suffix.split(self._DIV2) - job_name, job_digest, batch, num_batches = ( - self._DIV2.join(job_properties[:-3]), - job_properties[-3], - int(job_properties[-2]), - int(job_properties[-1]), - ) - - if not is_hex(job_digest): - print("ERROR: wrong record job digest") - return None - - record = self.Record( - record_type, - job_name, - job_digest, - batch, - num_batches, - release_branch, - file="", - ) - return record - - def print_status(self): - for record_type in self.RecordType: - GHActions.print_in_group( - f"Cache records: [{record_type}]", list(self.records[record_type]) - ) - return self - - def update(self): - """ - Pulls cache records from s3. Only records name w/o content. - """ - for record_type in self.RecordType: - prefix = self.s3_record_prefixes[record_type] - cache_list = self.records[record_type] - for job_type in self.JobType: - path = self.cache_s3_paths[job_type] - records = self.s3.list_prefix(f"{path}{prefix}", S3_BUILDS_BUCKET) - records = [record.split("/")[-1] for record in records] - for file in records: - record = self._parse_record_file_name( - record_type=record_type, file_name=file - ) - if not record: - print(f"ERROR: failed to parse cache record [{file}]") - continue - if ( - record.job_name not in self.job_digests - or self.job_digests[record.job_name] != record.job_digest - ): - # skip records we are not interested in - continue - - if record.to_str_key() not in cache_list: - cache_list[record.to_str_key()] = record - self.cache_data_fetched = False - elif ( - not cache_list[record.to_str_key()].release_branch - and record.release_branch - ): - # replace a non-release record with a release one - cache_list[record.to_str_key()] = record - self.cache_data_fetched = False - - self.cache_updated = True - return self - - def fetch_records_data(self): - """ - Pulls CommitStatusData for all cached jobs from s3 - """ - if not self.cache_updated: - self.update() - - if self.cache_data_fetched: - # there are no records without fetched data - no need to fetch - return self - - # clean up - for file in self._LOCAL_CACHE_PATH.glob("*.ci"): - file.unlink() - - # download all record files - for job_type in self.JobType: - path = self.cache_s3_paths[job_type] - for record_type in self.RecordType: - prefix = self.s3_record_prefixes[record_type] - _ = self.s3.download_files( - bucket=S3_BUILDS_BUCKET, - s3_path=f"{path}{prefix}", - file_suffix=self._RECORD_FILE_EXTENSION, - local_directory=self._LOCAL_CACHE_PATH, - ) - - # validate we have files for all records and save file names meanwhile - for record_type in self.RecordType: - record_list = self.records[record_type] - for _, record in record_list.items(): - record_file_name = self._get_record_file_name( - record_type, - record.job_name, - record.batch, - record.num_batches, - record.release_branch, - ) - assert ( - self._LOCAL_CACHE_PATH / record_file_name - ).is_file(), f"BUG. Record file must be present: {self._LOCAL_CACHE_PATH / record_file_name}" - record.file = record_file_name - - self.cache_data_fetched = True - return self - - def exist( - self, - record_type: "CiCache.RecordType", - job: str, - batch: int, - num_batches: int, - release_branch: bool, - ) -> bool: - if not self.cache_updated: - self.update() - record_key = self.Record( - record_type, - job, - self.job_digests[job], - batch, - num_batches, - release_branch, - ).to_str_key() - res = record_key in self.records[record_type] - if release_branch: - return res and self.records[record_type][record_key].release_branch - else: - return res - - def push( - self, - record_type: "CiCache.RecordType", - job: str, - batches: Union[int, Sequence[int]], - num_batches: int, - status: Union[CommitStatusData, PendingState], - release_branch: bool = False, - ) -> None: - """ - Pushes a cache record (CommitStatusData) - @release_branch adds "release" attribute to a record - """ - if isinstance(batches, int): - batches = [batches] - for batch in batches: - record_file = self._LOCAL_CACHE_PATH / self._get_record_file_name( - record_type, job, batch, num_batches, release_branch - ) - record_s3_path = self._get_record_s3_path(job) - if record_type == self.RecordType.SUCCESSFUL: - assert isinstance(status, CommitStatusData) - status.dump_to_file(record_file) - elif record_type == self.RecordType.FAILED: - assert isinstance(status, CommitStatusData) - status.dump_to_file(record_file) - elif record_type == self.RecordType.PENDING: - assert isinstance(status, PendingState) - with open(record_file, "w", encoding="utf-8") as json_file: - json.dump(asdict(status), json_file) - else: - assert False - - _ = self.s3.upload_file( - bucket=S3_BUILDS_BUCKET, - file_path=record_file, - s3_path=record_s3_path + record_file.name, - ) - record = self.Record( - record_type, - job, - self.job_digests[job], - batch, - num_batches, - release_branch, - file=record_file.name, - ) - if ( - record.release_branch - or record.to_str_key() not in self.records[record_type] - ): - self.records[record_type][record.to_str_key()] = record - - def get( - self, record_type: "CiCache.RecordType", job: str, batch: int, num_batches: int - ) -> Optional[Union[CommitStatusData, PendingState]]: - """ - Gets a cache record data for a job, or None if a cache miss - """ - - if not self.cache_data_fetched: - self.fetch_records_data() - - record_key = self.Record( - record_type, - job, - self.job_digests[job], - batch, - num_batches, - release_branch=False, - ).to_str_key() - - if record_key not in self.records[record_type]: - return None - - record_file_name = self.records[record_type][record_key].file - - res = CommitStatusData.load_from_file( - self._LOCAL_CACHE_PATH / record_file_name - ) # type: CommitStatusData - - return res - - def delete( - self, - record_type: "CiCache.RecordType", - job: str, - batch: int, - num_batches: int, - release_branch: bool, - ) -> None: - """ - deletes record from the cache - """ - raise NotImplementedError("Let's try make cache push-and-read-only") - # assert ( - # record_type == self.RecordType.PENDING - # ), "FIXME: delete is supported for pending records only" - # record_file_name = self._get_record_file_name( - # self.RecordType.PENDING, - # job, - # batch, - # num_batches, - # release_branch=release_branch, - # ) - # record_s3_path = self._get_record_s3_path(job) - # self.s3.delete_file_from_s3(S3_BUILDS_BUCKET, record_s3_path + record_file_name) - - # record_key = self.Record( - # record_type, - # job, - # self.job_digests[job], - # batch, - # num_batches, - # release_branch=False, - # ).to_str_key() - - # if record_key in self.records[record_type]: - # del self.records[record_type][record_key] - - def is_successful( - self, job: str, batch: int, num_batches: int, release_branch: bool - ) -> bool: - """ - checks if a given job have already been done successfuly - """ - return self.exist( - self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch - ) - - def is_failed( - self, job: str, batch: int, num_batches: int, release_branch: bool - ) -> bool: - """ - checks if a given job have already been done with failure - """ - return self.exist( - self.RecordType.FAILED, job, batch, num_batches, release_branch - ) - - def is_pending( - self, job: str, batch: int, num_batches: int, release_branch: bool - ) -> bool: - """ - check pending record in the cache for a given job - @release_branch - checks that "release" attribute is set for a record - """ - if self.is_successful( - job, batch, num_batches, release_branch - ) or self.is_failed(job, batch, num_batches, release_branch): - return False - - return self.exist( - self.RecordType.PENDING, job, batch, num_batches, release_branch - ) - - def push_successful( - self, - job: str, - batch: int, - num_batches: int, - job_status: CommitStatusData, - release_branch: bool = False, - ) -> None: - """ - Pushes a cache record (CommitStatusData) - @release_branch adds "release" attribute to a record - """ - self.push( - self.RecordType.SUCCESSFUL, - job, - [batch], - num_batches, - job_status, - release_branch, - ) - - def push_failed( - self, - job: str, - batch: int, - num_batches: int, - job_status: CommitStatusData, - release_branch: bool = False, - ) -> None: - """ - Pushes a cache record of type Failed (CommitStatusData) - @release_branch adds "release" attribute to a record - """ - self.push( - self.RecordType.FAILED, - job, - [batch], - num_batches, - job_status, - release_branch, - ) - - def push_pending( - self, job: str, batches: List[int], num_batches: int, release_branch: bool - ) -> None: - """ - pushes pending record for a job to the cache - """ - pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL) - self.push( - self.RecordType.PENDING, - job, - batches, - num_batches, - pending_state, - release_branch, - ) - - def get_successful( - self, job: str, batch: int, num_batches: int - ) -> Optional[CommitStatusData]: - """ - Gets a cache record (CommitStatusData) for a job, or None if a cache miss - """ - res = self.get(self.RecordType.SUCCESSFUL, job, batch, num_batches) - assert res is None or isinstance(res, CommitStatusData) - return res - - def delete_pending( - self, job: str, batch: int, num_batches: int, release_branch: bool - ) -> None: - """ - deletes pending record from the cache - """ - self.delete(self.RecordType.PENDING, job, batch, num_batches, release_branch) - - def download_build_reports(self, file_prefix: str = "") -> List[str]: - """ - not ideal class for this method, - but let it be as we store build reports in CI cache directory on s3 - and CiCache knows where exactly - - @file_prefix allows to filter out reports by git head_ref - """ - report_path = Path(REPORT_PATH) - report_path.mkdir(exist_ok=True, parents=True) - path = ( - self._get_record_s3_path(Build.PACKAGE_RELEASE) - + self._CACHE_BUILD_REPORT_PREFIX - ) - if file_prefix: - path += "_" + file_prefix - reports_files = self.s3.download_files( - bucket=S3_BUILDS_BUCKET, - s3_path=path, - file_suffix=".json", - local_directory=report_path, - ) - return reports_files - - def upload_build_report(self, build_result: BuildResult) -> str: - result_json_path = build_result.write_json(Path(TEMP_PATH)) - s3_path = ( - self._get_record_s3_path(Build.PACKAGE_RELEASE) + result_json_path.name - ) - return self.s3.upload_file( - bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path - ) - - def await_jobs( - self, jobs_with_params: Dict[str, Dict[str, Any]], is_release_branch: bool - ) -> Dict[str, List[int]]: - """ - await pending jobs to be finished - @jobs_with_params - jobs to await. {JOB_NAME: {"batches": [BATCHES...], "num_batches": NUM_BATCHES}} - returns successfully finished jobs: {JOB_NAME: [BATCHES...]} - """ - if not jobs_with_params: - return {} - poll_interval_sec = 300 - # TIMEOUT * MAX_ROUNDS_TO_WAIT must be less than 6h (GH job timeout) with a room for rest RunConfig work - TIMEOUT = 3000 # 50 min - MAX_ROUNDS_TO_WAIT = 6 - MAX_JOB_NUM_TO_WAIT = 3 - await_finished: Dict[str, List[int]] = {} - round_cnt = 0 - while ( - len(jobs_with_params) > MAX_JOB_NUM_TO_WAIT - and round_cnt < MAX_ROUNDS_TO_WAIT - ): - round_cnt += 1 - GHActions.print_in_group( - f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", - list(jobs_with_params), - ) - # this is initial approach to wait pending jobs: - # start waiting for the next TIMEOUT seconds if there are more than X(=4) jobs to wait - # wait TIMEOUT seconds in rounds. Y(=5) is the max number of rounds - expired_sec = 0 - start_at = int(time.time()) - while expired_sec < TIMEOUT and jobs_with_params: - time.sleep(poll_interval_sec) - self.update() - jobs_with_params_copy = deepcopy(jobs_with_params) - for job_name in jobs_with_params: - num_batches = jobs_with_params[job_name]["num_batches"] - job_config = CI_CONFIG.get_job_config(job_name) - for batch in jobs_with_params[job_name]["batches"]: - if self.is_pending( - job_name, - batch, - num_batches, - release_branch=is_release_branch - and job_config.required_on_release_branch, - ): - continue - print( - f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore" - ) - - # some_job_ready = True - jobs_with_params_copy[job_name]["batches"].remove(batch) - if not jobs_with_params_copy[job_name]["batches"]: - del jobs_with_params_copy[job_name] - - if not self.is_successful( - job_name, - batch, - num_batches, - release_branch=is_release_branch - and job_config.required_on_release_branch, - ): - print( - f"NOTE: Job [{job_name}:{batch}] finished but no success - remove from awaiting list, do not add to ready" - ) - continue - if job_name in await_finished: - await_finished[job_name].append(batch) - else: - await_finished[job_name] = [batch] - jobs_with_params = jobs_with_params_copy - expired_sec = int(time.time()) - start_at - print( - f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" - ) - if await_finished: - GHActions.print_in_group( - f"Finished jobs, round [{round_cnt}]:", - [f"{job}:{batches}" for job, batches in await_finished.items()], - ) - GHActions.print_in_group( - "Remaining jobs:", - [f"{job}:{params['batches']}" for job, params in jobs_with_params.items()], - ) - return await_finished - - -@dataclass -class CiOptions: - # job will be included in the run if any keyword from the list matches job name - include_keywords: Optional[List[str]] = None - # job will be excluded in the run if any keyword from the list matches job name - exclude_keywords: Optional[List[str]] = None - - # list of specified preconfigured ci sets to run - ci_sets: Optional[List[str]] = None - # list of specified jobs to run - ci_jobs: Optional[List[str]] = None - - # btaches to run for all multi-batch jobs - job_batches: Optional[List[int]] = None - - do_not_test: bool = False - no_ci_cache: bool = False - no_merge_commit: bool = False - - def as_dict(self) -> Dict[str, Any]: - return asdict(self) - - @staticmethod - def create_from_run_config(run_config: Dict[str, Any]) -> "CiOptions": - return CiOptions(**run_config["ci_options"]) - - @staticmethod - def create_from_pr_message( - debug_message: Optional[str], update_from_api: bool - ) -> "CiOptions": - """ - Creates CiOptions instance based on tags found in PR body and/or commit message - @commit_message - may be provided directly for debugging purposes, otherwise it will be retrieved from git. - """ - res = CiOptions() - pr_info = PRInfo() - if ( - not pr_info.is_pr and not debug_message - ): # if commit_message is provided it's test/debug scenario - do not return - # CI options can be configured in PRs only - # if debug_message is provided - it's a test - return res - message = debug_message or GitRunner(set_cwd_to_git_root=True).run( - f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1" - ) - - pattern = r"(#|- \[x\] + Exclude: All with TSAN, MSAN, UBSAN, Coverage + pattern = r"(#|- \[x\] + Integration tests +- [x] Non required - [ ] Integration tests (arm64) - [x] Integration tests - [x] Integration tests @@ -32,7 +33,7 @@ _TEST_BODY_2 = """ - [x] MUST include azure - [x] no action must be applied - [ ] no action must be applied -- [x] MUST exclude tsan +- [x] MUST exclude tsan - [x] MUST exclude aarch64 - [x] MUST exclude test with analazer - [ ] no action applied @@ -53,6 +54,14 @@ _TEST_JOB_LIST = [ "Fast test", "package_release", "package_asan", + "package_aarch64", + "package_release_coverage", + "package_debug", + "package_tsan", + "package_msan", + "package_ubsan", + "binary_release", + "fuzzers", "Docker server image", "Docker keeper image", "Install packages (amd64)", @@ -128,22 +137,24 @@ _TEST_JOB_LIST = [ "Bugfix validation", ] +_TEST_JOB_LIST_2 = ["Style check", "Fast test", "fuzzers"] + class TestCIOptions(unittest.TestCase): def test_pr_body_parsing(self): - ci_options = CiOptions.create_from_pr_message( + ci_options = CiSettings.create_from_pr_message( _TEST_BODY_1, update_from_api=False ) self.assertFalse(ci_options.do_not_test) self.assertFalse(ci_options.no_ci_cache) self.assertTrue(ci_options.no_merge_commit) - self.assertEqual(ci_options.ci_sets, ["ci_set_integration"]) + self.assertEqual(ci_options.ci_sets, ["ci_set_non_required"]) self.assertCountEqual(ci_options.include_keywords, ["foo", "foo_bar"]) self.assertCountEqual(ci_options.exclude_keywords, ["foo", "foo_bar"]) def test_options_applied(self): self.maxDiff = None - ci_options = CiOptions.create_from_pr_message( + ci_options = CiSettings.create_from_pr_message( _TEST_BODY_2, update_from_api=False ) self.assertCountEqual( @@ -152,26 +163,35 @@ class TestCIOptions(unittest.TestCase): ) self.assertCountEqual( ci_options.exclude_keywords, - ["tsan", "aarch64", "analyzer", "s3_storage", "coverage"], + ["tsan", "foobar", "aarch64", "analyzer", "s3_storage", "coverage"], ) - jobs_to_do = list(_TEST_JOB_LIST) - jobs_to_skip = [] - job_params = { - "Stateless tests (azure, asan)": { - "batches": list(range(3)), - "num_batches": 3, - "run_if_ci_option_include_set": True, - } - } - jobs_to_do, jobs_to_skip, job_params = ci_options.apply( - jobs_to_do, jobs_to_skip, job_params + + jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} + jobs_configs[ + "fuzzers" + ].run_by_label = ( + "TEST_LABEL" # check "fuzzers" appears in the result due to the label + ) + jobs_configs[ + "Integration tests (asan)" + ].release_only = ( + True # still must be included as it's set with include keywords + ) + filtered_jobs = list( + ci_options.apply( + jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + ) ) self.assertCountEqual( - jobs_to_do, + filtered_jobs, [ "Style check", + "fuzzers", "package_release", "package_asan", + "package_debug", + "package_msan", + "package_ubsan", "Stateless tests (asan)", "Stateless tests (azure, asan)", "Stateless tests flaky check (asan)", @@ -186,54 +206,88 @@ class TestCIOptions(unittest.TestCase): ) def test_options_applied_2(self): + jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST_2} + jobs_configs["Style check"].release_only = True + jobs_configs["Fast test"].pr_only = True + jobs_configs["fuzzers"].run_by_label = "TEST_LABEL" + # no settings are set + filtered_jobs = list( + CiSettings().apply(jobs_configs, is_release=False, is_pr=True, labels=[]) + ) + self.assertCountEqual( + filtered_jobs, + [ + "Fast test", + ], + ) + + filtered_jobs = list( + CiSettings().apply(jobs_configs, is_release=True, is_pr=False, labels=[]) + ) + self.assertCountEqual( + filtered_jobs, + [ + "Style check", + ], + ) + + def test_options_applied_3(self): + ci_settings = CiSettings() + ci_settings.include_keywords = ["Style"] + jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST_2} + jobs_configs["Style check"].release_only = True + jobs_configs["Fast test"].pr_only = True + # no settings are set + filtered_jobs = list( + ci_settings.apply( + jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + ) + ) + self.assertCountEqual( + filtered_jobs, + [ + "Style check", + ], + ) + + ci_settings.include_keywords = ["Fast"] + filtered_jobs = list( + ci_settings.apply( + jobs_configs, is_release=True, is_pr=False, labels=["TEST_LABEL"] + ) + ) + self.assertCountEqual( + filtered_jobs, + [ + "Style check", + ], + ) + + def test_options_applied_4(self): self.maxDiff = None - ci_options = CiOptions.create_from_pr_message( + ci_options = CiSettings.create_from_pr_message( _TEST_BODY_3, update_from_api=False ) self.assertCountEqual(ci_options.include_keywords, ["analyzer"]) self.assertIsNone(ci_options.exclude_keywords) - jobs_to_do = list(_TEST_JOB_LIST) - jobs_to_skip = [] - job_params = {} - jobs_to_do, jobs_to_skip, job_params = ci_options.apply( - jobs_to_do, jobs_to_skip, job_params + jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} + jobs_configs[ + "fuzzers" + ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result + jobs_configs["Integration tests (asan)"].release_only = True + filtered_jobs = list( + ci_options.apply( + jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + ) ) self.assertCountEqual( - jobs_to_do, + filtered_jobs, [ "Style check", "Integration tests (asan, old analyzer)", "package_release", "Stateless tests (release, old analyzer, s3, DatabaseReplicated)", "package_asan", + "fuzzers", ], ) - - def test_options_applied_3(self): - self.maxDiff = None - ci_options = CiOptions.create_from_pr_message( - _TEST_BODY_4, update_from_api=False - ) - self.assertIsNone(ci_options.include_keywords, None) - self.assertIsNone(ci_options.exclude_keywords, None) - jobs_to_do = list(_TEST_JOB_LIST) - jobs_to_skip = [] - job_params = {} - - for job in _TEST_JOB_LIST: - if "Stateless" in job: - job_params[job] = { - "batches": list(range(3)), - "num_batches": 3, - "run_if_ci_option_include_set": "azure" in job, - } - else: - job_params[job] = {"run_if_ci_option_include_set": False} - - jobs_to_do, jobs_to_skip, job_params = ci_options.apply( - jobs_to_do, jobs_to_skip, job_params - ) - self.assertNotIn( - "Stateless tests (azure, asan)", - jobs_to_do, - ) diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index f649732171f..800bfcf52c3 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -165,6 +165,21 @@ class ClickHouseVersion: self._description = version_type self._describe = f"v{self.string}-{version_type}" + def copy(self) -> "ClickHouseVersion": + copy = ClickHouseVersion( + self.major, + self.minor, + self.patch, + self.revision, + self._git, + str(self.tweak), + ) + try: + copy.with_description(self.description) + except ValueError: + pass + return copy + def __eq__(self, other: Any) -> bool: if not isinstance(self, type(other)): return NotImplemented diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index de1d128dc87..ec7793db2aa 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -300,11 +300,44 @@ list_children () { echo "$children" } -while true; do - runner_pid=$(pgrep Runner.Listener) - echo "Got runner pid '$runner_pid'" +# There's possibility that it fails because the runner's version is outdated, +# so after the first failure we'll try to launch it with enabled autoupdate. +# +# We'll fail and terminate after 10 consequent failures. +ATTEMPT=0 +# In `kill` 0 means "all processes in process group", -1 is "all but PID 1" +# We use `-2` to get an error +RUNNER_PID=-2 + +while true; do + # Does not send signal, but checks that the process $RUNNER_PID is running + if kill -0 -- $RUNNER_PID; then + ATTEMPT=0 + echo "Runner is working with pid $RUNNER_PID, checking the metadata in background" + check_proceed_spot_termination + + if ! is_job_assigned; then + RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$RUNNER_PID" 2>/dev/null || date +%s) )) + echo "The runner is launched $RUNNER_AGE seconds ago and still hasn't received a job" + if (( 60 < RUNNER_AGE )); then + echo "Attempt to delete the runner for a graceful shutdown" + sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ + || continue + echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" + terminate_and_exit + fi + fi + else + if [ "$RUNNER_PID" != "-2" ]; then + wait $RUNNER_PID \ + && echo "Runner with PID $RUNNER_PID successfully finished" \ + || echo "Attempt $((++ATTEMPT)) to start the runner" + fi + if (( ATTEMPT > 10 )); then + echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it" + terminate_and_exit + fi - if [ -z "$runner_pid" ]; then cd $RUNNER_HOME || terminate_and_exit detect_delayed_termination # If runner is not active, check that it needs to terminate itself @@ -314,37 +347,50 @@ while true; do check_proceed_spot_termination force echo "Going to configure runner" - sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$(get_runner_token)" \ - --ephemeral --disableupdate --unattended \ - --runnergroup Default --labels "$LABELS" --work _work --name "$INSTANCE_ID" + token_args=(--token "$(get_runner_token)") + config_args=( + "${token_args[@]}" --url "$RUNNER_URL" + --ephemeral --unattended --replace --runnergroup Default + --labels "$LABELS" --work _work --name "$INSTANCE_ID" + ) + if (( ATTEMPT > 1 )); then + echo 'The runner failed to start at least once. Removing it and then configuring with autoupdate enabled.' + sudo -u ubuntu ./config.sh remove "${token_args[@]}" + sudo -u ubuntu ./config.sh "${config_args[@]}" + else + echo "Configure runner with disabled autoupdate" + config_args+=("--disableupdate") + sudo -u ubuntu ./config.sh "${config_args[@]}" + fi echo "Another one check to avoid race between runner and infrastructure" no_terminating_metadata || terminate_on_event check_spot_instance_is_old && terminate_and_exit check_proceed_spot_termination force + # There were some failures to start the Job because of trash in _work + rm -rf _work + + # https://github.com/actions/runner/issues/3266 + # We're unable to know if the runner is failed to start. + echo 'Monkey-patching run helpers to get genuine exit code of the runner' + for script in run.sh run-helper.sh.template; do + # shellcheck disable=SC2016 + grep -q 'exit 0$' "$script" && \ + sed 's/exit 0/exit $returnCode/' -i "$script" && \ + echo "Script $script is patched" + done + echo "Run" sudo -u ubuntu \ ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \ ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \ ./run.sh & - sleep 10 - else - echo "Runner is working with pid $runner_pid, checking the metadata in background" - check_proceed_spot_termination + RUNNER_PID=$! - if ! is_job_assigned; then - RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$runner_pid" 2>/dev/null || date +%s) )) - echo "The runner is launched $RUNNER_AGE seconds ago and still has hot received the job" - if (( 60 < RUNNER_AGE )); then - echo "Attempt to delete the runner for a graceful shutdown" - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ - || continue - echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" - terminate_and_exit - fi - fi + sleep 10 fi + sleep 5 done diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index effc224c2d5..3e2f33c89d1 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -9,7 +9,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.315.0 +export RUNNER_VERSION=2.316.1 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { @@ -91,6 +91,8 @@ apt-get install --yes --no-install-recommends azure-cli # Increase the limit on number of virtual memory mappings to aviod 'Cannot mmap' error echo "vm.max_map_count = 2097152" > /etc/sysctl.d/01-increase-map-counts.conf +# Workarond for sanitizers uncompatibility with some kernels, see https://github.com/google/sanitizers/issues/856 +echo "vm.mmap_rnd_bits=28" > /etc/sysctl.d/02-vm-mmap_rnd_bits.conf systemctl restart docker @@ -155,31 +157,56 @@ apt-get install tailscale --yes --no-install-recommends # Create a common script for the instances mkdir /usr/local/share/scripts -p -cat > /usr/local/share/scripts/init-network.sh << 'EOF' -#!/usr/bin/env bash +setup_cloudflare_dns() { + # Add cloudflare DNS as a fallback + # Get default gateway interface + local IFACE ETH_DNS CLOUDFLARE_NS new_dns + IFACE=$(ip --json route list | jq '.[]|select(.dst == "default").dev' --raw-output) + # `Link 2 (eth0): 172.31.0.2` + ETH_DNS=$(resolvectl dns "$IFACE") || : + CLOUDFLARE_NS=1.1.1.1 + if [[ "$ETH_DNS" ]] && [[ "${ETH_DNS#*: }" != *"$CLOUDFLARE_NS"* ]]; then + # Cut the leading legend + ETH_DNS=${ETH_DNS#*: } + # shellcheck disable=SC2206 + new_dns=(${ETH_DNS} "$CLOUDFLARE_NS") + resolvectl dns "$IFACE" "${new_dns[@]}" + fi +} -# Add cloudflare DNS as a fallback -# Get default gateway interface -IFACE=$(ip --json route list | jq '.[]|select(.dst == "default").dev' --raw-output) -# `Link 2 (eth0): 172.31.0.2` -ETH_DNS=$(resolvectl dns "$IFACE") || : -CLOUDFLARE_NS=1.1.1.1 -if [[ "$ETH_DNS" ]] && [[ "${ETH_DNS#*: }" != *"$CLOUDFLARE_NS"* ]]; then - # Cut the leading legend - ETH_DNS=${ETH_DNS#*: } - # shellcheck disable=SC2206 - new_dns=(${ETH_DNS} "$CLOUDFLARE_NS") - resolvectl dns "$IFACE" "${new_dns[@]}" -fi +setup_tailscale() { + # Setup tailscale, the very first action + local TS_API_CLIENT_ID TS_API_CLIENT_SECRET TS_AUTHKEY RUNNER_TYPE + TS_API_CLIENT_ID=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-id --query 'Parameter.Value' --output text --with-decryption) + TS_API_CLIENT_SECRET=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-secret --query 'Parameter.Value' --output text --with-decryption) -# Setup tailscale, the very first action -TS_API_CLIENT_ID=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-id --query 'Parameter.Value' --output text --with-decryption) -TS_API_CLIENT_SECRET=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-secret --query 'Parameter.Value' --output text --with-decryption) -export TS_API_CLIENT_ID TS_API_CLIENT_SECRET -TS_AUTHKEY=$(get-authkey -tags tag:svc-core-ci-github -reusable -ephemeral) -tailscale up --ssh --auth-key="$TS_AUTHKEY" --hostname="ci-runner-$INSTANCE_ID" + RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) + RUNNER_TYPE=${RUNNER_TYPE:-unknown} + # Clean possible garbage from the runner type + RUNNER_TYPE=${RUNNER_TYPE//[^0-9a-z]/-} + TS_AUTHKEY=$(TS_API_CLIENT_ID="$TS_API_CLIENT_ID" TS_API_CLIENT_SECRET="$TS_API_CLIENT_SECRET" \ + get-authkey -tags tag:svc-core-ci-github -ephemeral) + tailscale up --ssh --auth-key="$TS_AUTHKEY" --hostname="ci-runner-$RUNNER_TYPE-$INSTANCE_ID" +} + +cat > /usr/local/share/scripts/init-network.sh << EOF +!/usr/bin/env bash +$(declare -f setup_cloudflare_dns) + +$(declare -f setup_tailscale) + +# If the script is sourced, it will return now and won't execute functions +return 0 &>/dev/null || : + +echo Setup Cloudflare DNS +setup_cloudflare_dns + +echo Setup Tailscale VPN +setup_tailscale EOF +chmod +x /usr/local/share/scripts/init-network.sh + # The following line is used in aws TOE check. touch /var/tmp/clickhouse-ci-ami.success diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 35568ace72f..af203563d58 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -751,6 +751,7 @@ class SettingsRandomizer: "max_read_buffer_size": lambda: random.randint(500000, 1048576), "prefer_localhost_replica": lambda: random.randint(0, 1), "max_block_size": lambda: random.randint(8000, 100000), + "max_joined_block_size_rows": lambda: random.randint(8000, 100000), "max_threads": lambda: random.randint(1, 64), "optimize_append_index": lambda: random.randint(0, 1), "optimize_if_chain_to_multiif": lambda: random.randint(0, 1), @@ -908,7 +909,7 @@ class MergeTreeSettingsRandomizer: ), "cache_populated_by_fetch": lambda: random.randint(0, 1), "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100), - "old_parts_lifetime": threshold_generator(0.2, 0.3, 30, 8 * 60), + "old_parts_lifetime": threshold_generator(0.2, 0.3, 10, 8 * 60), } @staticmethod @@ -1222,12 +1223,9 @@ class TestCase: return FailureReason.S3_STORAGE elif ( tags - and ("no-s3-storage-with-slow-build" in tags) + and "no-s3-storage-with-slow-build" in tags and args.s3_storage - and ( - BuildFlags.THREAD in args.build_flags - or BuildFlags.DEBUG in args.build_flags - ) + and BuildFlags.RELEASE not in args.build_flags ): return FailureReason.S3_STORAGE @@ -2410,6 +2408,17 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): for _ in range(jobs): parallel_tests_array.append((None, batch_size, test_suite)) + # If we don't do random shuffling then there will be always + # nearly the same groups of test suites running concurrently. + # Thus, if there is a test within group which appears to be broken + # then it will affect all other tests in a non-random form. + # So each time a bad test fails - other tests from the group will also fail + # and this process will be more or less stable. + # It makes it more difficult to detect real flaky tests, + # because the distribution and the amount + # of failures will be nearly the same for all tests from the group. + random.shuffle(test_suite.parallel_tests) + try: with closing(multiprocessing.Pool(processes=jobs)) as pool: pool.map_async(run_tests_array, parallel_tests_array) @@ -2548,15 +2557,15 @@ def reportLogStats(args): WITH 240 AS mins, ( - SELECT (count(), sum(length(message))) + SELECT (count(), sum(length(toValidUTF8(message)))) FROM system.text_log WHERE (now() - toIntervalMinute(mins)) < event_time ) AS total SELECT count() AS count, round(count / (total.1), 3) AS `count_%`, - formatReadableSize(sum(length(message))) AS size, - round(sum(length(message)) / (total.2), 3) AS `size_%`, + formatReadableSize(sum(length(toValidUTF8(message)))) AS size, + round(sum(length(toValidUTF8(message))) / (total.2), 3) AS `size_%`, countDistinct(logger_name) AS uniq_loggers, countDistinct(thread_id) AS uniq_threads, groupArrayDistinct(toString(level)) AS levels, @@ -2579,8 +2588,8 @@ def reportLogStats(args): 240 AS mins SELECT count() AS count, - substr(replaceRegexpAll(message, '[^A-Za-z]+', ''), 1, 32) AS pattern, - substr(any(message), 1, 256) as runtime_message, + substr(replaceRegexpAll(toValidUTF8(message), '[^A-Za-z]+', ''), 1, 32) AS pattern, + substr(any(toValidUTF8(message)), 1, 256) as runtime_message, any((extract(source_file, '/[a-zA-Z0-9_]+\\.[a-z]+'), source_line)) as line FROM system.text_log WHERE (now() - toIntervalMinute(mins)) < event_time AND message_format_string = '' @@ -2595,7 +2604,7 @@ def reportLogStats(args): print("\n") query = """ - SELECT message_format_string, count(), any(message) AS any_message + SELECT message_format_string, count(), any(toValidUTF8(message)) AS any_message FROM system.text_log WHERE (now() - toIntervalMinute(240)) < event_time AND (message NOT LIKE (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') AS s)) @@ -2630,8 +2639,8 @@ def reportLogStats(args): 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', 'Attempt to read after eof', 'String size is too big ({}), maximum: {}' ) AS known_short_messages - SELECT count() AS c, message_format_string, substr(any(message), 1, 120), - min(if(length(regexpExtract(message, '(.*)\\([A-Z0-9_]+\\)')) as prefix_len > 0, prefix_len, length(message)) - 26 AS length_without_exception_boilerplate) AS min_length_without_exception_boilerplate + SELECT count() AS c, message_format_string, substr(any(toValidUTF8(message)), 1, 120), + min(if(length(regexpExtract(toValidUTF8(message), '(.*)\\([A-Z0-9_]+\\)')) as prefix_len > 0, prefix_len, length(toValidUTF8(message))) - 26 AS length_without_exception_boilerplate) AS min_length_without_exception_boilerplate FROM system.text_log WHERE (now() - toIntervalMinute(240)) < event_time AND (length(message_format_string) < 16 @@ -2950,7 +2959,7 @@ def parse_args(): parser.add_argument("--extract_from_config", help="extract-from-config program") parser.add_argument( - "--configclient", help="Client config (if you use not default ports)" + "--configclient", help="Client config (if you do not use default ports)" ) parser.add_argument( "--configserver", @@ -2970,7 +2979,7 @@ def parse_args(): parser.add_argument( "--global_time_limit", type=int, - help="Stop if executing more than specified time (after current test finished)", + help="Stop if executing more than specified time (after current test is finished)", ) parser.add_argument("test", nargs="*", help="Optional test case name regex") parser.add_argument( diff --git a/tests/config/config.d/block_number.xml b/tests/config/config.d/block_number.xml index b56f1f1afc2..4b08c37d2f5 100644 --- a/tests/config/config.d/block_number.xml +++ b/tests/config/config.d/block_number.xml @@ -1,6 +1,7 @@ - 0 + 0 + 0 diff --git a/tests/config/config.d/max_num_to_warn.xml b/tests/config/config.d/max_num_to_warn.xml index 776c270823d..1f55e6fd674 100644 --- a/tests/config/config.d/max_num_to_warn.xml +++ b/tests/config/config.d/max_num_to_warn.xml @@ -1,5 +1,7 @@ 5 + 5 + 5 2 10 diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 0e6cd4b0e03..7a9b579c00a 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -92,6 +92,13 @@ 22548578304 100 + + s3 + http://localhost:11111/test/special/ + clickhouse + clickhouse + 0 + @@ -107,6 +114,13 @@ + + +
+ s3_no_cache +
+
+
diff --git a/tests/config/install.sh b/tests/config/install.sh index 33dcac9d2c7..6536683b6c2 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -181,11 +181,8 @@ elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then ln -sf $SRC_PATH/config.d/azure_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ fi -ARM="aarch64" -OS="$(uname -m)" if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then - echo "$OS" - if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$OS" == "$ARM" ]]; then + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then echo "Azure configuration will not be added" else echo "Adding azure configuration" diff --git a/tests/fuzz/dictionaries/datatypes.dict b/tests/fuzz/dictionaries/datatypes.dict index 232e89db0c0..a01a94fd3e3 100644 --- a/tests/fuzz/dictionaries/datatypes.dict +++ b/tests/fuzz/dictionaries/datatypes.dict @@ -132,3 +132,4 @@ "YEAR" "bool" "boolean" +"Dynamic" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 693e41253cb..41c162217d2 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -513,6 +513,7 @@ class ClickHouseCluster: self.minio_redirect_host = "proxy1" self.minio_redirect_ip = None self.minio_redirect_port = 8080 + self.minio_docker_id = self.get_instance_docker_id(self.minio_host) self.spark_session = None @@ -4294,6 +4295,9 @@ class ClickHouseInstance: ) return xml_str + def get_machine_name(self): + return platform.machine() + @property def odbc_drivers(self): if self.odbc_ini_path: @@ -4301,12 +4305,12 @@ class ClickHouseInstance: "SQLite3": { "DSN": "sqlite3_odbc", "Database": "/tmp/sqliteodbc", - "Driver": "/usr/lib/x86_64-linux-gnu/odbc/libsqlite3odbc.so", - "Setup": "/usr/lib/x86_64-linux-gnu/odbc/libsqlite3odbc.so", + "Driver": f"/usr/lib/{self.get_machine_name()}-linux-gnu/odbc/libsqlite3odbc.so", + "Setup": f"/usr/lib/{self.get_machine_name()}-linux-gnu/odbc/libsqlite3odbc.so", }, "MySQL": { "DSN": "mysql_odbc", - "Driver": "/usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so", + "Driver": f"/usr/lib/{self.get_machine_name()}-linux-gnu/odbc/libmyodbc.so", "Database": odbc_mysql_db, "Uid": odbc_mysql_uid, "Pwd": odbc_mysql_pass, @@ -4323,8 +4327,8 @@ class ClickHouseInstance: "ReadOnly": "No", "RowVersioning": "No", "ShowSystemTables": "No", - "Driver": "/usr/lib/x86_64-linux-gnu/odbc/psqlodbca.so", - "Setup": "/usr/lib/x86_64-linux-gnu/odbc/libodbcpsqlS.so", + "Driver": f"/usr/lib/{self.get_machine_name()}-linux-gnu/odbc/psqlodbca.so", + "Setup": f"/usr/lib/{self.get_machine_name()}-linux-gnu/odbc/libodbcpsqlS.so", "ConnSettings": "", }, } diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 206f960293f..686abc76bdf 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -165,11 +165,50 @@ class _ServerRuntime: '' "" "ExpectedError" - "mock s3 injected error" + "mock s3 injected unretryable error" "txfbd566d03042474888193-00608d7537" "" ) - request_handler.write_error(data) + request_handler.write_error(500, data) + + class SlowDownAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "SlowDown" + "Slow Down." + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(429, data) + + # make sure that Alibaba errors (QpsLimitExceeded, TotalQpsLimitExceededAction) are retriable + # we patched contrib/aws to achive it: https://github.com/ClickHouse/aws-sdk-cpp/pull/22 https://github.com/ClickHouse/aws-sdk-cpp/pull/23 + # https://www.alibabacloud.com/help/en/oss/support/http-status-code-503 + class QpsLimitExceededAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "QpsLimitExceeded" + "Please reduce your request rate." + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(429, data) + + class TotalQpsLimitExceededAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "TotalQpsLimitExceeded" + "Please reduce your request rate." + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(429, data) class RedirectAction: def __init__(self, host="localhost", port=1): @@ -239,6 +278,16 @@ class _ServerRuntime: self.error_handler = _ServerRuntime.BrokenPipeAction() elif self.action == "redirect_to": self.error_handler = _ServerRuntime.RedirectAction(*self.action_args) + elif self.action == "slow_down": + self.error_handler = _ServerRuntime.SlowDownAction(*self.action_args) + elif self.action == "qps_limit_exceeded": + self.error_handler = _ServerRuntime.QpsLimitExceededAction( + *self.action_args + ) + elif self.action == "total_qps_limit_exceeded": + self.error_handler = _ServerRuntime.TotalQpsLimitExceededAction( + *self.action_args + ) else: self.error_handler = _ServerRuntime.Expected500ErrorAction() @@ -344,12 +393,12 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.end_headers() self.wfile.write(b"Redirected") - def write_error(self, data, content_length=None): + def write_error(self, http_code, data, content_length=None): if content_length is None: content_length = len(data) self.log_message("write_error %s", data) self.read_all_input() - self.send_response(500) + self.send_response(http_code) self.send_header("Content-Type", "text/xml") self.send_header("Content-Length", str(content_length)) self.end_headers() @@ -418,7 +467,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): path = [x for x in parts.path.split("/") if x] assert path[0] == "mock_settings", path if len(path) < 2: - return self.write_error("_mock_settings: wrong command") + return self.write_error(400, "_mock_settings: wrong command") if path[1] == "at_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) @@ -477,7 +526,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.log_message("reset") return self._ok() - return self.write_error("_mock_settings: wrong command") + return self.write_error(400, "_mock_settings: wrong command") def do_GET(self): if self.path == "/": diff --git a/tests/integration/helpers/s3_url_proxy_tests_util.py b/tests/integration/helpers/s3_url_proxy_tests_util.py index 9059fda08ae..c67d00769c5 100644 --- a/tests/integration/helpers/s3_url_proxy_tests_util.py +++ b/tests/integration/helpers/s3_url_proxy_tests_util.py @@ -2,21 +2,35 @@ import os import time +ALL_HTTP_METHODS = {"POST", "PUT", "GET", "HEAD", "CONNECT"} + + def check_proxy_logs( - cluster, proxy_instance, protocol, bucket, http_methods={"POST", "PUT", "GET"} + cluster, proxy_instances, protocol, bucket, requested_http_methods ): for i in range(10): - logs = cluster.get_container_logs(proxy_instance) # Check with retry that all possible interactions with Minio are present - for http_method in http_methods: - if ( - logs.find(http_method + f" {protocol}://minio1:9001/root/data/{bucket}") - >= 0 - ): - return + for http_method in ALL_HTTP_METHODS: + for proxy_instance in proxy_instances: + logs = cluster.get_container_logs(proxy_instance) + if ( + logs.find( + http_method + f" {protocol}://minio1:9001/root/data/{bucket}" + ) + >= 0 + ): + if http_method not in requested_http_methods: + assert ( + False + ), f"Found http method {http_method} for bucket {bucket} that should not be found in {proxy_instance} logs" + break + else: + if http_method in requested_http_methods: + assert ( + False + ), f"{http_method} method not found in logs of {proxy_instance} for bucket {bucket}" + time.sleep(1) - else: - assert False, f"{http_methods} method not found in logs of {proxy_instance}" def wait_resolver(cluster): @@ -33,8 +47,8 @@ def wait_resolver(cluster): if response == "proxy1" or response == "proxy2": return time.sleep(i) - else: - assert False, "Resolver is not up" + + assert False, "Resolver is not up" # Runs simple proxy resolver in python env container. @@ -80,9 +94,33 @@ def perform_simple_queries(node, minio_endpoint): def simple_test(cluster, proxies, protocol, bucket): minio_endpoint = build_s3_endpoint(protocol, bucket) - node = cluster.instances[f"{bucket}"] + node = cluster.instances[bucket] perform_simple_queries(node, minio_endpoint) - for proxy in proxies: - check_proxy_logs(cluster, proxy, protocol, bucket) + check_proxy_logs(cluster, proxies, protocol, bucket, ["PUT", "GET", "HEAD"]) + + +def simple_storage_test(cluster, node, proxies, policy): + node.query( + """ + CREATE TABLE s3_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format( + policy + ) + ) + node.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')") + assert ( + node.query("SELECT * FROM s3_test order by id FORMAT Values") + == "(0,'data'),(1,'data')" + ) + + node.query("DROP TABLE IF EXISTS s3_test SYNC") + + # not checking for POST because it is in a different format + check_proxy_logs(cluster, proxies, "http", policy, ["PUT", "GET"]) diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py index 3e58c961421..004d39e1dea 100644 --- a/tests/integration/test_access_for_functions/test.py +++ b/tests/integration/test_access_for_functions/test.py @@ -42,7 +42,7 @@ def test_access_rights_for_function(): function_resolution_error = instance.query_and_get_error("SELECT MySum(1, 2)") assert ( "Unknown function MySum" in function_resolution_error - or "Function with name 'MySum' does not exists." in function_resolution_error + or "Function with name 'MySum' does not exist." in function_resolution_error ) instance.query("REVOKE CREATE FUNCTION ON *.* FROM A") diff --git a/tests/queries/0_stateless/00694_max_block_size_zero.reference b/tests/integration/test_attach_partition_using_copy/__init__.py similarity index 100% rename from tests/queries/0_stateless/00694_max_block_size_zero.reference rename to tests/integration/test_attach_partition_using_copy/__init__.py diff --git a/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml b/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml new file mode 100644 index 00000000000..b40730e9f7d --- /dev/null +++ b/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + replica1 + 9000 + + + replica2 + 9000 + + + + + diff --git a/tests/integration/test_attach_partition_using_copy/test.py b/tests/integration/test_attach_partition_using_copy/test.py new file mode 100644 index 00000000000..e7163b1eb32 --- /dev/null +++ b/tests/integration/test_attach_partition_using_copy/test.py @@ -0,0 +1,201 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +replica1 = cluster.add_instance( + "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] +) +replica2 = cluster.add_instance( + "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + except Exception as ex: + print(ex) + finally: + cluster.shutdown() + + +def cleanup(nodes): + for node in nodes: + node.query("DROP TABLE IF EXISTS source SYNC") + node.query("DROP TABLE IF EXISTS destination SYNC") + + +def create_source_table(node, table_name, replicated): + replica = node.name + engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" + if replicated + else "MergeTree()" + ) + node.query_with_retry( + """ + ATTACH TABLE {table_name} UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' + ( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) + ) + ENGINE = {engine} + ORDER BY (postcode1, postcode2, addr1, addr2) + SETTINGS disk = disk(type = web, endpoint = 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/') + """.format( + table_name=table_name, engine=engine + ) + ) + + +def create_destination_table(node, table_name, replicated): + replica = node.name + engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" + if replicated + else "MergeTree()" + ) + node.query_with_retry( + """ + CREATE TABLE {table_name} + ( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) + ) + ENGINE = {engine} + ORDER BY (postcode1, postcode2, addr1, addr2) + """.format( + table_name=table_name, engine=engine + ) + ) + + +def test_both_mergtree(start_cluster): + create_source_table(replica1, "source", False) + create_destination_table(replica1, "destination", False) + + replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") + + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" + ), + ) + + assert_eq_with_retry( + replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" + ) + + cleanup([replica1]) + + +def test_all_replicated(start_cluster): + create_source_table(replica1, "source", True) + create_destination_table(replica1, "destination", True) + create_destination_table(replica2, "destination", True) + + replica1.query("SYSTEM SYNC REPLICA destination") + replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") + + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" + ), + ) + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", + replica2.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC" + ), + ) + + assert_eq_with_retry( + replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" + ) + + assert_eq_with_retry( + replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" + ) + + cleanup([replica1, replica2]) + + +def test_only_destination_replicated(start_cluster): + create_source_table(replica1, "source", False) + create_destination_table(replica1, "destination", True) + create_destination_table(replica2, "destination", True) + + replica1.query("SYSTEM SYNC REPLICA destination") + replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") + + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" + ), + ) + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", + replica2.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC" + ), + ) + + assert_eq_with_retry( + replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" + ) + + assert_eq_with_retry( + replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" + ) + + cleanup([replica1, replica2]) + + +def test_not_work_on_different_disk(start_cluster): + # Replace and move should not work on replace + create_source_table(replica1, "source", False) + create_destination_table(replica2, "destination", False) + + replica1.query_and_get_error( + f"ALTER TABLE destination REPLACE PARTITION tuple() FROM source" + ) + replica1.query_and_get_error( + f"ALTER TABLE destination MOVE PARTITION tuple() FROM source" + ) + cleanup([replica1, replica2]) diff --git a/tests/integration/test_azure_blob_storage_native_copy/__init__.py b/tests/integration/test_azure_blob_storage_native_copy/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_azure_blob_storage_native_copy/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_azure_blob_storage_native_copy/test.py b/tests/integration/test_azure_blob_storage_native_copy/test.py new file mode 100644 index 00000000000..77d400240b1 --- /dev/null +++ b/tests/integration/test_azure_blob_storage_native_copy/test.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 + +import gzip +import json +import logging +import os +import io +import random +import threading +import time + +from azure.storage.blob import BlobServiceClient +import helpers.client +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.network import PartitionManager +from helpers.mock_servers import start_mock_servers +from helpers.test_tools import exec_query_with_retry + + +def generate_config(port): + path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "./_gen/storage_conf.xml", + ) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w") as f: + TEMPLATE = """ + + + + + local + object_storage + azure_blob_storage + http://azurite1:{port}/devstoreaccount1/ + cont + false + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + true + + + local + object_storage + azure_blob_storage + true + http://azurite1:{port}/devstoreaccount1/ + othercontainer + false + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + + + cache + disk_azure + /tmp/azure_cache/ + 1000000000 + 1 + + + + + +
+ disk_azure +
+
+
+ + +
+ disk_azure_other_bucket +
+
+
+ + +
+ disk_azure_cache +
+
+
+
+
+ + disk_azure + disk_azure_cache + disk_azure_other_bucket + +
+ """ + f.write(TEMPLATE.format(port=port)) + return path + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + port = cluster.azurite_port + path = generate_config(port) + cluster.add_instance( + "node1", + main_configs=[path], + with_azurite=True, + ) + cluster.add_instance( + "node2", + main_configs=[path], + with_azurite=True, + ) + cluster.add_instance( + "node3", + main_configs=[path], + with_azurite=True, + ) + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def azure_query( + node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None +): + for i in range(try_num): + try: + if expect_error: + return node.query_and_get_error(query, settings=settings) + else: + return node.query(query, settings=settings) + except Exception as ex: + retriable_errors = [ + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", + "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected", + "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException, e.what() = Error while polling for socket ready read", + ] + retry = False + for error in retriable_errors: + if error in str(ex): + retry = True + print(f"Try num: {i}. Having retriable error: {ex}") + time.sleep(i) + break + if not retry or i == try_num - 1: + raise Exception(ex) + if query_on_retry is not None: + node.query(query_on_retry) + continue + + +def test_backup_restore_on_merge_tree_same_container(cluster): + node1 = cluster.instances["node1"] + azure_query( + node1, + f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_cache'", + ) + azure_query(node1, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')") + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_backup')" + print("BACKUP DEST", backup_destination) + azure_query( + node1, + f"BACKUP TABLE test_simple_merge_tree TO {backup_destination}", + ) + + assert node1.contains_in_log("using native copy") + + azure_query( + node1, + f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};", + ) + assert ( + azure_query(node1, f"SELECT * from test_simple_merge_tree_restored") == "1\ta\n" + ) + + assert node1.contains_in_log("using native copy") + + azure_query(node1, f"DROP TABLE test_simple_merge_tree") + azure_query(node1, f"DROP TABLE test_simple_merge_tree_restored") + + +def test_backup_restore_on_merge_tree_different_container(cluster): + node2 = cluster.instances["node2"] + azure_query( + node2, + f"CREATE TABLE test_simple_merge_tree_different_bucket(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_other_bucket'", + ) + azure_query( + node2, f"INSERT INTO test_simple_merge_tree_different_bucket VALUES (1, 'a')" + ) + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_different_bucket_backup_different_bucket')" + print("BACKUP DEST", backup_destination) + azure_query( + node2, + f"BACKUP TABLE test_simple_merge_tree_different_bucket TO {backup_destination}", + ) + + assert node2.contains_in_log("using native copy") + + azure_query( + node2, + f"RESTORE TABLE test_simple_merge_tree_different_bucket AS test_simple_merge_tree_different_bucket_restored FROM {backup_destination};", + ) + assert ( + azure_query( + node2, f"SELECT * from test_simple_merge_tree_different_bucket_restored" + ) + == "1\ta\n" + ) + + assert node2.contains_in_log("using native copy") + + azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket") + azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket_restored") + + +def test_backup_restore_on_merge_tree_native_copy_async(cluster): + node3 = cluster.instances["node3"] + azure_query( + node3, + f"CREATE TABLE test_simple_merge_tree_async(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_cache'", + ) + azure_query(node3, f"INSERT INTO test_simple_merge_tree_async VALUES (1, 'a')") + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_async_backup')" + print("BACKUP DEST", backup_destination) + azure_query( + node3, + f"BACKUP TABLE test_simple_merge_tree_async TO {backup_destination}", + settings={"azure_max_single_part_copy_size": 0}, + ) + + assert node3.contains_in_log("using native copy") + + azure_query( + node3, + f"RESTORE TABLE test_simple_merge_tree_async AS test_simple_merge_tree_async_restored FROM {backup_destination};", + settings={"azure_max_single_part_copy_size": 0}, + ) + assert ( + azure_query(node3, f"SELECT * from test_simple_merge_tree_async_restored") + == "1\ta\n" + ) + + assert node3.contains_in_log("using native copy") + + azure_query(node3, f"DROP TABLE test_simple_merge_tree_async") + azure_query(node3, f"DROP TABLE test_simple_merge_tree_async_restored") diff --git a/tests/integration/test_azure_blob_storage_plain_rewritable/__init__.py b/tests/integration/test_azure_blob_storage_plain_rewritable/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_azure_blob_storage_plain_rewritable/test.py b/tests/integration/test_azure_blob_storage_plain_rewritable/test.py new file mode 100644 index 00000000000..96d116ec6a2 --- /dev/null +++ b/tests/integration/test_azure_blob_storage_plain_rewritable/test.py @@ -0,0 +1,153 @@ +import logging +import os +import random +import string + +import pytest + +from helpers.cluster import ClickHouseCluster +from azure.storage.blob import BlobServiceClient +from test_storage_azure_blob_storage.test import azure_query + +NODE_NAME = "node" + + +def generate_cluster_def(port): + path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "./_gen/disk_storage_conf.xml", + ) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w") as f: + f.write( + f""" + + + + object_storage + azure_blob_storage + plain_rewritable + http://azurite1:{port}/devstoreaccount1 + cont + true + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + 100000 + 100000 + 10 + 10 + + + + + +
+ blob_storage_disk +
+
+
+
+
+
+""" + ) + return path + + +insert_values = [ + "(0,'data'),(1,'data')", + ",".join( + f"({i},'{''.join(random.choices(string.ascii_lowercase, k=5))}')" + for i in range(10) + ), +] + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + port = cluster.azurite_port + path = generate_cluster_def(port) + cluster.add_instance( + NODE_NAME, + main_configs=[ + path, + ], + with_azurite=True, + stay_alive=True, + ) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_insert_select(cluster): + node = cluster.instances[NODE_NAME] + + for index, value in enumerate(insert_values): + azure_query( + node, + """ + CREATE TABLE test_{} ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='blob_storage_policy' + """.format( + index + ), + ) + + azure_query(node, "INSERT INTO test_{} VALUES {}".format(index, value)) + assert ( + azure_query( + node, "SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index) + ) + == value + ) + + +def test_restart_server(cluster): + node = cluster.instances[NODE_NAME] + + for index, value in enumerate(insert_values): + assert ( + azure_query( + node, "SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index) + ) + == value + ) + node.restart_clickhouse() + + for index, value in enumerate(insert_values): + assert ( + azure_query( + node, "SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index) + ) + == value + ) + + +def test_drop_table(cluster): + node = cluster.instances[NODE_NAME] + + for index, value in enumerate(insert_values): + node.query("DROP TABLE IF EXISTS test_{} SYNC".format(index)) + + port = cluster.env_variables["AZURITE_PORT"] + connection_string = ( + f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" + f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" + f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;" + ) + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + containers = blob_service_client.list_containers() + for container in containers: + container_client = blob_service_client.get_container_client(container) + assert len(list(container_client.list_blobs())) == 0 diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 1a1458cb68e..78b186e3227 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -281,7 +281,10 @@ def test_backup_restore_on_merge_tree(cluster): node = cluster.instances["node"] azure_query( node, - f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy'", + f""" + DROP TABLE IF EXISTS test_simple_merge_tree; + CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy' + """, ) azure_query(node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')") @@ -299,3 +302,85 @@ def test_backup_restore_on_merge_tree(cluster): ) azure_query(node, f"DROP TABLE test_simple_merge_tree") azure_query(node, f"DROP TABLE test_simple_merge_tree_restored") + + +def test_backup_restore_correct_block_ids(cluster): + node = cluster.instances["node"] + azure_query( + node, + f""" + DROP TABLE IF EXISTS test_simple_merge_tree; + CREATE TABLE test_simple_merge_tree(key UInt64, data String) + Engine = MergeTree() + ORDER BY tuple() + SETTINGS storage_policy='blob_storage_policy'""", + ) + data_query = "SELECT number, repeat('a', 100) FROM numbers(1000)" + azure_query( + node, + f"INSERT INTO test_simple_merge_tree {data_query}", + ) + + for min_upload_size, max_upload_size, max_blocks, expected_block_size in [ + (42, 100, 1000, 42), + (42, 52, 86, 52), + ]: + data_path = f"test_backup_correct_block_ids_{max_blocks}" + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{data_path}')" + azure_query( + node, + f""" + SET azure_min_upload_part_size = {min_upload_size}; + SET azure_max_upload_part_size = {max_upload_size}; + SET azure_max_blocks_in_multipart_upload = {max_blocks}; + BACKUP TABLE test_simple_merge_tree TO {backup_destination} SETTINGS allow_azure_native_copy = 0; + """, + ) + + port = cluster.azurite_port + connection_string = ( + f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" + f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" + f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;" + ) + container_name = "cont" + blob_service_client = BlobServiceClient.from_connection_string( + connection_string + ) + container_client = blob_service_client.get_container_client(container_name) + blobs = container_client.list_blobs() + + data_blob = ( + f"{data_path}/data/default/test_simple_merge_tree/all_1_1_0/data.bin" + ) + found = False + for blob in blobs: + if data_blob == blob.get("name"): + found = True + break + assert found + + blob_client = blob_service_client.get_blob_client( + blob=data_blob, container=container_name + ) + + blocks_num = len(blob_client.get_block_list()[0]) + assert blocks_num > 50 + + count = 0 + for block in blob_client.get_block_list()[0]: + count += 1 + if count < blocks_num: + assert block.get("size") == expected_block_size + else: + assert block.get("size") < expected_block_size + + azure_query( + node, + f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored_{max_blocks} FROM {backup_destination};", + ) + assert azure_query( + node, + f"SELECT * from test_simple_merge_tree_restored_{max_blocks} ORDER BY key", + ) == node.query(data_query) diff --git a/tests/integration/test_backup_restore_on_cluster/configs/cluster_2x2.xml b/tests/integration/test_backup_restore_on_cluster/configs/cluster_2x2.xml new file mode 100644 index 00000000000..97e60fbbed7 --- /dev/null +++ b/tests/integration/test_backup_restore_on_cluster/configs/cluster_2x2.xml @@ -0,0 +1,26 @@ + + + + + + node_1_1 + 9000 + + + node_1_2 + 9000 + + + + + node_2_1 + 9000 + + + node_2_2 + 9000 + + + + + diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index d1520444df1..700ed6f15f5 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -41,7 +41,6 @@ node2 = cluster.add_instance( stay_alive=True, # Necessary for the "test_stop_other_host_while_backup" test ) - node3 = cluster.add_instance( "node3", main_configs=main_configs, diff --git a/tests/integration/test_backup_restore_on_cluster/test_two_shards_two_replicas.py b/tests/integration/test_backup_restore_on_cluster/test_two_shards_two_replicas.py new file mode 100644 index 00000000000..c0e318c8bb7 --- /dev/null +++ b/tests/integration/test_backup_restore_on_cluster/test_two_shards_two_replicas.py @@ -0,0 +1,153 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + + +cluster = ClickHouseCluster(__file__) + +main_configs = [ + "configs/backups_disk.xml", + "configs/cluster_2x2.xml", + "configs/lesser_timeouts.xml", # Default timeouts are quite big (a few minutes), the tests don't need them to be that big. +] + +user_configs = [ + "configs/zookeeper_retries.xml", +] + +node_1_1 = cluster.add_instance( + "node_1_1", + main_configs=main_configs, + user_configs=user_configs, + external_dirs=["/backups/"], + macros={"replica": "1", "shard": "1"}, + with_zookeeper=True, +) + +node_1_2 = cluster.add_instance( + "node_1_2", + main_configs=main_configs, + user_configs=user_configs, + external_dirs=["/backups/"], + macros={"replica": "2", "shard": "1"}, + with_zookeeper=True, +) + +node_2_1 = cluster.add_instance( + "node_2_1", + main_configs=main_configs, + user_configs=user_configs, + external_dirs=["/backups/"], + macros={"replica": "1", "shard": "2"}, + with_zookeeper=True, +) + +node_2_2 = cluster.add_instance( + "node_2_2", + main_configs=main_configs, + user_configs=user_configs, + external_dirs=["/backups/"], + macros={"replica": "2", "shard": "2"}, + with_zookeeper=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def drop_after_test(): + try: + yield + finally: + node_1_1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster_2x2' SYNC") + node_1_1.query("DROP TABLE IF EXISTS table_a ON CLUSTER 'cluster_2x2' SYNC") + node_1_1.query("DROP TABLE IF EXISTS table_b ON CLUSTER 'cluster_2x2' SYNC") + + +backup_id_counter = 0 + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"Disk('backups', '{backup_id_counter}')" + + +def test_replicated_table(): + node_1_1.query( + "CREATE TABLE tbl ON CLUSTER 'cluster_2x2' (" + "x Int64" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/{shard}', '{replica}')" + "ORDER BY x" + ) + + node_1_1.query("INSERT INTO tbl VALUES (100), (200)") + node_2_1.query("INSERT INTO tbl VALUES (300), (400)") + + backup_name = new_backup_name() + + node_1_1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster_2x2' TO {backup_name}") + + node_1_1.query(f"DROP TABLE tbl ON CLUSTER 'cluster_2x2' SYNC") + + node_1_1.query(f"RESTORE ALL ON CLUSTER 'cluster_2x2' FROM {backup_name}") + + node_1_1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster_2x2' tbl") + + assert node_1_1.query("SELECT * FROM tbl ORDER BY x") == TSV([[100], [200]]) + assert node_1_2.query("SELECT * FROM tbl ORDER BY x") == TSV([[100], [200]]) + assert node_2_1.query("SELECT * FROM tbl ORDER BY x") == TSV([[300], [400]]) + assert node_2_2.query("SELECT * FROM tbl ORDER BY x") == TSV([[300], [400]]) + + +def test_two_tables_with_uuid_in_zk_path(): + node_1_1.query( + "CREATE TABLE table_a ON CLUSTER 'cluster_2x2' (" + "x Int64" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')" + "ORDER BY x" + ) + + node_1_1.query( + "CREATE TABLE table_b ON CLUSTER 'cluster_2x2' (" + "x Int64" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')" + "ORDER BY x" + ) + + node_1_1.query("INSERT INTO table_a VALUES (100), (200)") + node_2_1.query("INSERT INTO table_a VALUES (300), (400)") + + node_1_2.query("INSERT INTO table_b VALUES (500), (600)") + node_2_2.query("INSERT INTO table_b VALUES (700), (800)") + + backup_name = new_backup_name() + + node_1_1.query( + f"BACKUP TABLE table_a, TABLE table_b ON CLUSTER 'cluster_2x2' TO {backup_name}" + ) + + node_1_1.query(f"DROP TABLE table_a ON CLUSTER 'cluster_2x2' SYNC") + node_1_1.query(f"DROP TABLE table_b ON CLUSTER 'cluster_2x2' SYNC") + + node_1_1.query(f"RESTORE ALL ON CLUSTER 'cluster_2x2' FROM {backup_name}") + + node_1_1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster_2x2' table_a") + node_1_1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster_2x2' table_b") + + assert node_1_1.query("SELECT * FROM table_a ORDER BY x") == TSV([[100], [200]]) + assert node_1_2.query("SELECT * FROM table_a ORDER BY x") == TSV([[100], [200]]) + assert node_2_1.query("SELECT * FROM table_a ORDER BY x") == TSV([[300], [400]]) + assert node_2_2.query("SELECT * FROM table_a ORDER BY x") == TSV([[300], [400]]) + + assert node_1_1.query("SELECT * FROM table_b ORDER BY x") == TSV([[500], [600]]) + assert node_1_2.query("SELECT * FROM table_b ORDER BY x") == TSV([[500], [600]]) + assert node_2_1.query("SELECT * FROM table_b ORDER BY x") == TSV([[700], [800]]) + assert node_2_2.query("SELECT * FROM table_b ORDER BY x") == TSV([[700], [800]]) diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml new file mode 100644 index 00000000000..323e986f966 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml @@ -0,0 +1,22 @@ + + + + + + s3 + http://minio1:9001/root/data/disks/disk_s3_restricted_user/ + miniorestricted1 + minio123 + + + + + +
+ disk_s3_restricted_user +
+
+
+
+
+
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 05424887736..967ed6a221c 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -3,8 +3,11 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV import uuid +import os +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", @@ -20,13 +23,127 @@ node = cluster.add_instance( ], with_minio=True, with_zookeeper=True, + stay_alive=True, ) +def setup_minio_users(): + # create 2 extra users with restricted access + # miniorestricted1 - full access to bucket 'root', no access to other buckets + # miniorestricted2 - full access to bucket 'root2', no access to other buckets + # storage policy 'policy_s3_restricted' defines a policy for storing files inside bucket 'root' using 'miniorestricted1' user + for user, bucket in [("miniorestricted1", "root"), ("miniorestricted2", "root2")]: + print( + cluster.exec_in_container( + cluster.minio_docker_id, + [ + "mc", + "alias", + "set", + "root", + "http://minio1:9001", + "minio", + "minio123", + ], + ) + ) + policy = f""" +{{ + "Version": "2012-10-17", + "Statement": [ + {{ + "Effect": "Allow", + "Principal": {{ + "AWS": [ + "*" + ] + }}, + "Action": [ + "s3:GetBucketLocation", + "s3:ListBucket", + "s3:ListBucketMultipartUploads" + ], + "Resource": [ + "arn:aws:s3:::{bucket}" + ] + }}, + {{ + "Effect": "Allow", + "Principal": {{ + "AWS": [ + "*" + ] + }}, + "Action": [ + "s3:AbortMultipartUpload", + "s3:DeleteObject", + "s3:GetObject", + "s3:ListMultipartUploadParts", + "s3:PutObject" + ], + "Resource": [ + "arn:aws:s3:::{bucket}/*" + ] + }} + ] +}}""" + + cluster.exec_in_container( + cluster.minio_docker_id, + ["bash", "-c", f"cat >/tmp/{bucket}_policy.json < 1000000 1 + 0 diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml index 95a313ea4f2..c1ca258f6c4 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml +++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml @@ -5,6 +5,7 @@ 5 0 + 0 diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 22d6d263d23..476f7c61b28 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -91,7 +91,7 @@ def get_multipart_counters(node, query_id, log_type="ExceptionWhileProcessing"): SELECT ProfileEvents['S3CreateMultipartUpload'], ProfileEvents['S3UploadPart'], - ProfileEvents['S3WriteRequestsErrors'], + ProfileEvents['S3WriteRequestsErrors'] + ProfileEvents['S3WriteRequestsThrottling'], FROM system.query_log WHERE query_id='{query_id}' AND type='{log_type}' @@ -148,7 +148,7 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression ) assert "Code: 499" in error, error - assert "mock s3 injected error" in error, error + assert "mock s3 injected unretryable error" in error, error create_multipart, upload_parts, s3_errors = get_multipart_counters( node, insert_query_id @@ -190,7 +190,7 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( ) assert "Code: 499" in error, error - assert "mock s3 injected error" in error, error + assert "mock s3 injected unretryable error" in error, error create_multipart, upload_parts, s3_errors = get_multipart_counters( node, insert_query_id @@ -200,18 +200,33 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( assert s3_errors >= 2 -def test_when_s3_connection_refused_is_retried(cluster, broken_s3): +@pytest.mark.parametrize( + "action_and_message", + [ + ("slow_down", "DB::Exception: Slow Down."), + ("qps_limit_exceeded", "DB::Exception: Please reduce your request rate."), + ("total_qps_limit_exceeded", "DB::Exception: Please reduce your request rate."), + ( + "connection_refused", + "Poco::Exception. Code: 1000, e.code() = 111, Connection refused", + ), + ], + ids=lambda x: x[0], +) +def test_when_error_is_retried(cluster, broken_s3, action_and_message): node = cluster.instances["node"] - broken_s3.setup_fake_multpartuploads() - broken_s3.setup_at_part_upload(count=3, after=2, action="connection_refused") + action, message = action_and_message - insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED" + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload(count=3, after=2, action=action) + + insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED" node.query( f""" INSERT INTO TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'http://resolver:8083/root/data/test_when_{action}_retried', 'minio', 'minio123', 'CSV', auto, 'none' ) @@ -234,13 +249,13 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3): assert upload_parts == 39 assert s3_errors == 3 - broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused") - insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1" + broken_s3.setup_at_part_upload(count=1000, after=2, action=action) + insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED_1" error = node.query_and_get_error( f""" INSERT INTO TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'http://resolver:8083/root/data/test_when_{action}_retried', 'minio', 'minio123', 'CSV', auto, 'none' ) @@ -257,8 +272,78 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3): ) assert "Code: 499" in error, error + assert message in error, error + + +def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload( + count=3, + after=2, + action="broken_pipe", + ) + + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + create_multipart, upload_parts, s3_errors = get_multipart_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert create_multipart == 1 + assert upload_parts == 7 + assert s3_errors == 3 + + broken_s3.setup_at_part_upload( + count=1000, + after=2, + action="broken_pipe", + ) + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error assert ( - "Poco::Exception. Code: 1000, e.code() = 111, Connection refused" in error + "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" + in error ), error @@ -401,20 +486,20 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( ) error = node.query_and_get_error( f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=100, - s3_check_objects_after_upload=0 + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 """, query_id=insert_query_id, ) @@ -427,78 +512,6 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( ), error -def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): - node = cluster.instances["node"] - - broken_s3.setup_fake_multpartuploads() - broken_s3.setup_at_part_upload( - count=3, - after=2, - action="broken_pipe", - ) - - insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" - node.query( - f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=1000000, - s3_check_objects_after_upload=0 - """, - query_id=insert_query_id, - ) - - create_multipart, upload_parts, s3_errors = get_multipart_counters( - node, insert_query_id, log_type="QueryFinish" - ) - - assert create_multipart == 1 - assert upload_parts == 7 - assert s3_errors == 3 - - broken_s3.setup_at_part_upload( - count=1000, - after=2, - action="broken_pipe", - ) - insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" - error = node.query_and_get_error( - f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=1000000, - s3_check_objects_after_upload=0 - """, - query_id=insert_query_id, - ) - - assert "Code: 1000" in error, error - assert ( - "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" - in error - ), error - - def test_query_is_canceled_with_inf_retries(cluster, broken_s3): node = cluster.instances["node_with_inf_s3_retries"] diff --git a/tests/integration/test_config_yaml_merge_keys/__init__.py b/tests/integration/test_config_yaml_merge_keys/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_config_yaml_merge_keys/configs/merge_keys.yml b/tests/integration/test_config_yaml_merge_keys/configs/merge_keys.yml new file mode 100644 index 00000000000..2fcceeb7ddb --- /dev/null +++ b/tests/integration/test_config_yaml_merge_keys/configs/merge_keys.yml @@ -0,0 +1,41 @@ +--- +settings_common: &settings_common + max_threads: 1 + max_final_threads: 1 + +settings_extended: &settings_extended + max_final_threads: 2 + +profiles: + profile_1: + <<: *settings_common + profile_2: + <<: *settings_common + max_threads: 4 + profile_3: + max_threads: 4 + <<: *settings_common + profile_4: + <<: *settings_common + <<: *settings_extended + max_threads: 4 + profile_5: + <<: [*settings_common, *settings_extended] + max_threads: 4 + +users: + user_1: + profile: profile_1 + password: "" + user_2: + profile: profile_2 + password: "" + user_3: + profile: profile_3 + password: "" + user_4: + profile: profile_4 + password: "" + user_5: + profile: profile_5 + password: "" diff --git a/tests/integration/test_config_yaml_merge_keys/test.py b/tests/integration/test_config_yaml_merge_keys/test.py new file mode 100644 index 00000000000..e7ee164f170 --- /dev/null +++ b/tests/integration/test_config_yaml_merge_keys/test.py @@ -0,0 +1,39 @@ +import helpers +import pytest +from helpers.cluster import ClickHouseCluster + + +def test_yaml_merge_keys_conf(): + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance("node", user_configs=["configs/merge_keys.yml"]) + + try: + cluster.start() + + # Assert simple merge key substitution + assert node.query("select getSetting('max_threads')", user="user_1") == "1\n" + + # Assert merge key overriden by regular key + assert node.query("select getSetting('max_threads')", user="user_2") == "4\n" + + # Assert normal key overriden by merge key + assert node.query("select getSetting('max_threads')", user="user_3") == "4\n" + + # Assert override with multiple merge keys + assert ( + node.query("select getSetting('max_final_threads')", user="user_4") == "2\n" + ) + + # Assert multiple merge key substitutions overriden by regular key + assert node.query("select getSetting('max_threads')", user="user_4") == "4\n" + + # Assert override with multiple merge keys for list syntax + assert ( + node.query("select getSetting('max_final_threads')", user="user_5") == "2\n" + ) + + # Assert multiple merge key substitutions overriden by regular key + # for list syntax + assert node.query("select getSetting('max_threads')", user="user_5") == "4\n" + finally: + cluster.shutdown() diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index 360456b2046..332f4ca11bb 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -76,7 +76,7 @@ def test_mysql_dictionaries_custom_query_full_load(started_cluster): query = instance.query query( - """ + f""" CREATE DICTIONARY test_dictionary_custom_query ( id UInt64, @@ -95,12 +95,46 @@ def test_mysql_dictionaries_custom_query_full_load(started_cluster): """ ) - result = query("SELECT id, value_1, value_2 FROM test_dictionary_custom_query") + result = query( + "SELECT dictGetString('test_dictionary_custom_query', 'value_1', toUInt64(1))" + ) + assert result == "Value_1\n" + result = query("SELECT id, value_1, value_2 FROM test_dictionary_custom_query") assert result == "1\tValue_1\tValue_2\n" query("DROP DICTIONARY test_dictionary_custom_query;") + query( + f""" + CREATE DICTIONARY test_cache_dictionary_custom_query + ( + id1 UInt64, + id2 UInt64, + value_concat String + ) + PRIMARY KEY id1, id2 + LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10)) + SOURCE(MYSQL( + HOST 'mysql80' + PORT 3306 + USER 'root' + PASSWORD 'clickhouse' + QUERY 'SELECT id AS id1, id + 1 AS id2, CONCAT_WS(" ", "The", value_1) AS value_concat FROM test.test_table_1')) + LIFETIME(0) + """ + ) + + result = query( + "SELECT dictGetString('test_cache_dictionary_custom_query', 'value_concat', (1, 2))" + ) + assert result == "The Value_1\n" + + result = query("SELECT id1, value_concat FROM test_cache_dictionary_custom_query") + assert result == "1\tThe Value_1\n" + + query("DROP DICTIONARY test_cache_dictionary_custom_query;") + execute_mysql_query(mysql_connection, "DROP TABLE test.test_table_1;") execute_mysql_query(mysql_connection, "DROP TABLE test.test_table_2;") diff --git a/tests/integration/test_disk_configuration/test.py b/tests/integration/test_disk_configuration/test.py index 3fe8286fa43..c003ff85755 100644 --- a/tests/integration/test_disk_configuration/test.py +++ b/tests/integration/test_disk_configuration/test.py @@ -254,6 +254,7 @@ def test_merge_tree_custom_disk_setting(start_cluster): ORDER BY tuple() SETTINGS disk = disk( + name='test_name', type=s3, endpoint='http://minio1:9001/root/data2/', access_key_id='minio', @@ -262,7 +263,7 @@ def test_merge_tree_custom_disk_setting(start_cluster): ) expected = """ - SETTINGS disk = disk(type = s3, endpoint = \\'[HIDDEN]\\', access_key_id = \\'[HIDDEN]\\', secret_access_key = \\'[HIDDEN]\\'), index_granularity = 8192 + SETTINGS disk = disk(name = \\'test_name\\', type = s3, endpoint = \\'[HIDDEN]\\', access_key_id = \\'[HIDDEN]\\', secret_access_key = \\'[HIDDEN]\\'), index_granularity = 8192 """ assert expected.strip() in node1.query(f"SHOW CREATE TABLE {TABLE_NAME}_4").strip() diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 8ddc1ff3c31..9f43ab73fa3 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -40,6 +40,12 @@ def cluster(): image="clickhouse/clickhouse-server", tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, ) + cluster.add_instance( + "node5", + main_configs=["configs/storage_conf.xml"], + with_nginx=True, + use_old_analyzer=True, + ) cluster.start() @@ -352,7 +358,6 @@ def test_page_cache(cluster): node.query("SYSTEM FLUSH LOGS") def get_profile_events(query_name): - print(f"asdqwe {query_name}") text = node.query( f"SELECT ProfileEvents.Names, ProfileEvents.Values FROM system.query_log ARRAY JOIN ProfileEvents WHERE query LIKE '% -- {query_name}' AND type = 'QueryFinish'" ) @@ -361,7 +366,6 @@ def test_page_cache(cluster): if line == "": continue name, value = line.split("\t") - print(f"asdqwe {name} = {int(value)}") res[name] = int(value) return res @@ -390,3 +394,21 @@ def test_page_cache(cluster): node.query("DROP TABLE test{} SYNC".format(i)) print(f"Ok {i}") + + +def test_config_reload(cluster): + node1 = cluster.instances["node5"] + table_name = "config_reload" + + global uuids + node1.query( + f""" + DROP TABLE IF EXISTS {table_name}; + CREATE TABLE {table_name} UUID '{uuids[0]}' + (id Int32) ENGINE = MergeTree() ORDER BY id + SETTINGS disk = disk(type=web, endpoint='http://nginx:80/test1/'); + """ + ) + + node1.query("SYSTEM RELOAD CONFIG") + node1.query(f"DROP TABLE {table_name} SYNC") diff --git a/tests/integration/test_disk_types/configs/storage.xml b/tests/integration/test_disk_types/configs/storage_amd.xml similarity index 100% rename from tests/integration/test_disk_types/configs/storage.xml rename to tests/integration/test_disk_types/configs/storage_amd.xml diff --git a/tests/integration/test_disk_types/configs/storage_arm.xml b/tests/integration/test_disk_types/configs/storage_arm.xml new file mode 100644 index 00000000000..a246cc8469e --- /dev/null +++ b/tests/integration/test_disk_types/configs/storage_arm.xml @@ -0,0 +1,17 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + encrypted + disk_s3 + 1234567812345678 + + + + diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 3c4169be4de..1cc5048eb69 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -19,7 +19,9 @@ def cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "node", - main_configs=["configs/storage.xml"], + main_configs=["configs/storage_arm.xml"] + if is_arm() + else ["configs/storage_amd.xml"], with_minio=True, with_hdfs=not is_arm(), ) diff --git a/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml b/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml new file mode 100644 index 00000000000..a747d61a0dd --- /dev/null +++ b/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml @@ -0,0 +1,12 @@ + + + + + + ::/0 + + default + default + + + diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 10dbb23d961..50d7be4d11e 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -12,12 +12,16 @@ from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION cluster = ClickHouseCluster(__file__) -def make_instance(name, cfg, *args, **kwargs): +def make_instance(name, *args, **kwargs): + main_configs = kwargs.pop("main_configs", []) + main_configs.append("configs/remote_servers.xml") + user_configs = kwargs.pop("user_configs", []) + user_configs.append("configs/users.xml") return cluster.add_instance( name, with_zookeeper=True, - main_configs=["configs/remote_servers.xml", cfg], - user_configs=["configs/users.xml"], + main_configs=main_configs, + user_configs=user_configs, *args, **kwargs, ) @@ -27,11 +31,16 @@ def make_instance(name, cfg, *args, **kwargs): assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" # _n1/_n2 contains cluster with different -- should fail -n1 = make_instance("n1", "configs/remote_servers_n1.xml") -n2 = make_instance("n2", "configs/remote_servers_n2.xml") +# only n1 contains new_user +n1 = make_instance( + "n1", + main_configs=["configs/remote_servers_n1.xml"], + user_configs=["configs/users.d/new_user.xml"], +) +n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) backward = make_instance( "backward", - "configs/remote_servers_backward.xml", + main_configs=["configs/remote_servers_backward.xml"], image="clickhouse/clickhouse-server", # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, @@ -100,6 +109,12 @@ def bootstrap(): ) """ ) + n.query( + """ + CREATE TABLE dist_over_dist_secure AS data + Engine=Distributed(secure, currentDatabase(), dist_secure, key) + """ + ) @pytest.fixture(scope="module", autouse=True) @@ -432,3 +447,20 @@ def test_user_secure_cluster_from_backward(user, password): assert n1.contains_in_log( "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." ) + + +def test_secure_cluster_distributed_over_distributed_different_users(): + # This works because we will have initial_user='default' + n1.query( + "SELECT * FROM remote('n1', currentDatabase(), dist_secure)", user="new_user" + ) + # While this is broken because now initial_user='new_user', and n2 does not has it + with pytest.raises(QueryRuntimeException): + n2.query( + "SELECT * FROM remote('n1', currentDatabase(), dist_secure, 'new_user')" + ) + # And this is still a problem, let's assume that this is OK, since we are + # expecting that in case of dist-over-dist the clusters are the same (users + # and stuff). + with pytest.raises(QueryRuntimeException): + n1.query("SELECT * FROM dist_over_dist_secure", user="new_user") diff --git a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml index 7a9cda7ccbd..32d5d131a44 100644 --- a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml +++ b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml @@ -1,3 +1,4 @@ 10 + throw diff --git a/tests/integration/test_group_array_element_size/test.py b/tests/integration/test_group_array_element_size/test.py index 86b1d5feeee..90b2712ffbf 100644 --- a/tests/integration/test_group_array_element_size/test.py +++ b/tests/integration/test_group_array_element_size/test.py @@ -9,6 +9,12 @@ node1 = cluster.add_instance( stay_alive=True, ) +node2 = cluster.add_instance( + "node2", + main_configs=["configs/group_array_max_element_size.xml"], + stay_alive=True, +) + @pytest.fixture(scope="module") def started_cluster(): @@ -63,3 +69,33 @@ def test_max_exement_size(started_cluster): node1.restart_clickhouse() assert node1.query("select length(groupArrayMerge(x)) from tab3") == "21\n" + + +def test_limit_size(started_cluster): + node2.query( + "CREATE TABLE tab4 (x AggregateFunction(groupArray, Array(UInt8))) ENGINE = MergeTree ORDER BY tuple()" + ) + node2.query("insert into tab4 select groupArrayState([zero]) from zeros(10)") + assert node2.query("select length(groupArrayMerge(x)) from tab4") == "10\n" + + node2.replace_in_config( + "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", + "throw", + "discard", + ) + + node2.restart_clickhouse() + + node2.query("insert into tab4 select groupArrayState([zero]) from zeros(100)") + assert node2.query("select length(groupArrayMerge(x)) from tab4") == "10\n" + + node2.replace_in_config( + "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", + "discard", + "throw", + ) + + node2.restart_clickhouse() + + with pytest.raises(Exception, match=r"Too large array size"): + node2.query("insert into tab4 select groupArrayState([zero]) from zeros(11)") diff --git a/tests/integration/test_host_resolver_fail_count/__init__.py b/tests/integration/test_host_resolver_fail_count/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml b/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml new file mode 100644 index 00000000000..bde62b82719 --- /dev/null +++ b/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml @@ -0,0 +1,12 @@ + + + + 5 + 5 + 5 + 5 + 5 + 5 + + + \ No newline at end of file diff --git a/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml b/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml new file mode 100644 index 00000000000..94ac83b32ac --- /dev/null +++ b/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml @@ -0,0 +1,21 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + + + +
+ s3 +
+
+
+
+
+
diff --git a/tests/integration/test_host_resolver_fail_count/test_case.py b/tests/integration/test_host_resolver_fail_count/test_case.py new file mode 100644 index 00000000000..d25681f0781 --- /dev/null +++ b/tests/integration/test_host_resolver_fail_count/test_case.py @@ -0,0 +1,126 @@ +"""Test Interserver responses on configured IP.""" + +import pytest +import time +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config.d/cluster.xml", "configs/config.d/s3.xml"], + with_minio=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +# The same value as in ClickHouse, this can't be confugured via config now +DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2 * 60 + + +def test_host_resolver(start_cluster): + minio_ip = cluster.get_instance_ip("minio1") + + # drop DNS cache + node.set_hosts( + [ + (minio_ip, "minio1"), + (node.ip_address, "minio1"), # no answer on 9001 port on this IP + ] + ) + + node.query("SYSTEM DROP DNS CACHE") + node.query("SYSTEM DROP CONNECTIONS CACHE") + + node.query( + """ + CREATE TABLE test (key UInt32, value UInt32) + Engine=MergeTree() + ORDER BY key PARTITION BY key + SETTINGS storage_policy='s3' + """ + ) + + initial_fails = "0\n" + k = 0 + limit = 100 + while initial_fails == "0\n": + node.query( + f""" + INSERT INTO test VALUES (0,{k}) + """ + ) + # HostResolver chooses IP randomly, so on single call can choose worked ID + initial_fails = node.query( + "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" + ) + k += 1 + if k >= limit: + # Dead IP was not choosen for 100 iteration. + # This is not expected, but not an error actually. + # And test should be stopped. + return + + # initial_fails can be more than 1 if clickhouse does something in several parallel threads + + for j in range(10): + for i in range(10): + node.query( + f""" + INSERT INTO test VALUES ({i+1},{j+1}) + """ + ) + fails = node.query( + "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" + ) + assert fails == initial_fails + + # Check that clickhouse tries to recheck IP after 2 minutes + time.sleep(DEFAULT_RESOLVE_TIME_HISTORY_SECONDS) + + intermediate_fails = initial_fails + limit = k + 100 + while intermediate_fails == initial_fails: + node.query( + f""" + INSERT INTO test VALUES (101,{k}) + """ + ) + intermediate_fails = node.query( + "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" + ) + k += 1 + if k >= limit: + # Dead IP was not choosen for 100 iteration. + # This is not expected, but not an error actually. + # And test should be stopped. + return + + # After another 2 minutes shoudl not be new fails, next retry after 4 minutes + time.sleep(DEFAULT_RESOLVE_TIME_HISTORY_SECONDS) + + initial_fails = intermediate_fails + limit = k + 100 + while intermediate_fails == initial_fails: + node.query( + f""" + INSERT INTO test VALUES (102,{k}) + """ + ) + intermediate_fails = node.query( + "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" + ) + k += 1 + if k >= limit: + break + + assert k == limit diff --git a/tests/integration/test_hot_reload_storage_policy/configs/storage_configuration.xml b/tests/integration/test_hot_reload_storage_policy/configs/config.d/storage_configuration.xml similarity index 56% rename from tests/integration/test_hot_reload_storage_policy/configs/storage_configuration.xml rename to tests/integration/test_hot_reload_storage_policy/configs/config.d/storage_configuration.xml index 466ecde137d..8940efb3301 100644 --- a/tests/integration/test_hot_reload_storage_policy/configs/storage_configuration.xml +++ b/tests/integration/test_hot_reload_storage_policy/configs/config.d/storage_configuration.xml @@ -4,18 +4,25 @@ /var/lib/clickhouse/disk0/ - - /var/lib/clickhouse/disk1/ - - + disk0 - + + + + + + localhost + 9000 + + + + \ No newline at end of file diff --git a/tests/integration/test_hot_reload_storage_policy/test.py b/tests/integration/test_hot_reload_storage_policy/test.py index 8654b0462e4..1d38f39d72c 100644 --- a/tests/integration/test_hot_reload_storage_policy/test.py +++ b/tests/integration/test_hot_reload_storage_policy/test.py @@ -10,11 +10,8 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) -node0 = cluster.add_instance( - "node0", with_zookeeper=True, main_configs=["configs/storage_configuration.xml"] -) -node1 = cluster.add_instance( - "node1", with_zookeeper=True, main_configs=["configs/storage_configuration.xml"] +node = cluster.add_instance( + "node", main_configs=["configs/config.d/storage_configuration.xml"], stay_alive=True ) @@ -28,6 +25,37 @@ def started_cluster(): cluster.shutdown() +old_disk_config = """ + + + + + /var/lib/clickhouse/disk0/ + + + + + + + disk0 + + + + + + + + + + localhost + 9000 + + + + + +""" + new_disk_config = """ @@ -38,49 +66,120 @@ new_disk_config = """ /var/lib/clickhouse/disk1/ - - /var/lib/clickhouse/disk2/ - - - disk2 + disk1 + + disk0 - + + + + + + localhost + 9000 + + + + """ def set_config(node, config): - node.replace_config( - "/etc/clickhouse-server/config.d/storage_configuration.xml", config - ) + node.replace_config("/etc/clickhouse-server/config.d/config.xml", config) node.query("SYSTEM RELOAD CONFIG") + # to give ClickHouse time to refresh disks + time.sleep(1) def test_hot_reload_policy(started_cluster): - node0.query( - "CREATE TABLE t (d Int32, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/t', '0') PARTITION BY d ORDER BY tuple() SETTINGS storage_policy = 'default_policy'" + node.query( + "CREATE TABLE t (d Int32, s String) ENGINE = MergeTree() PARTITION BY d ORDER BY tuple() SETTINGS storage_policy = 'default_policy'" ) - node0.query("INSERT INTO TABLE t VALUES (1, 'foo') (1, 'bar')") + node.query("SYSTEM STOP MERGES t") + node.query("INSERT INTO TABLE t VALUES (1, 'foo')") - node1.query( - "CREATE TABLE t (d Int32, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/t_mirror', '1') PARTITION BY d ORDER BY tuple() SETTINGS storage_policy = 'default_policy'" + set_config(node, new_disk_config) + + # After reloading new policy with new disk, merge tree tables should reinitialize the new disk (create relative path, 'detached' folder...) + # and as default policy is `least_used`, at least one insertion should come to the new disk + node.query("INSERT INTO TABLE t VALUES (1, 'foo')") + node.query("INSERT INTO TABLE t VALUES (1, 'bar')") + + num_disks = int( + node.query( + "SELECT uniqExact(disk_name) FROM system.parts WHERE database = 'default' AND table = 't'" + ) ) - set_config(node1, new_disk_config) - time.sleep(1) - node1.query("ALTER TABLE t FETCH PARTITION 1 FROM '/clickhouse/tables/t'") - result = int(node1.query("SELECT count() FROM t")) + assert ( - result == 4, - "Node should have 2 x full data (4 rows) after reloading storage configuration and fetch new partition, but get {} rows".format( - result - ), + num_disks == 2 + ), "Node should write data to 2 disks after reloading disks, but got {}".format( + num_disks ) + + # If `detached` is not created this query will throw exception + node.query("ALTER TABLE t DETACH PARTITION 1") + + node.query("DROP TABLE t") + + +def test_hot_reload_policy_distributed_table(started_cluster): + # Same test for distributed table, it should reinitialize the storage policy and data volume + # We check it by trying an insert and the distribution queue must be on new disk + + # Restart node first + set_config(node, old_disk_config) + node.restart_clickhouse() + + node.query( + "CREATE TABLE t (d Int32, s String) ENGINE = MergeTree PARTITION BY d ORDER BY tuple()" + ) + node.query( + "CREATE TABLE t_d (d Int32, s String) ENGINE = Distributed('default', 'default', 't', d%20, 'default_policy')" + ) + + node.query("SYSTEM STOP DISTRIBUTED SENDS t_d") + node.query( + "INSERT INTO TABLE t_d SETTINGS prefer_localhost_replica = 0 VALUES (2, 'bar') (12, 'bar')" + ) + # t_d should create queue on disk0 + queue_path = node.query("SELECT data_path FROM system.distribution_queue") + + assert ( + "disk0" in queue_path + ), "Distributed table should create distributed queue on disk0 (disk1), but the queue path is {}".format( + queue_path + ) + + node.query("SYSTEM START DISTRIBUTED SENDS t_d") + + node.query("SYSTEM FLUSH DISTRIBUTED t_d") + + set_config(node, new_disk_config) + + node.query("SYSTEM STOP DISTRIBUTED SENDS t_d") + node.query( + "INSERT INTO TABLE t_d SETTINGS prefer_localhost_replica = 0 VALUES (2, 'bar') (12, 'bar')" + ) + + # t_d should create queue on disk1 + queue_path = node.query("SELECT data_path FROM system.distribution_queue") + + assert ( + "disk1" in queue_path + ), "Distributed table should be using new disk (disk1), but the queue paths are {}".format( + queue_path + ) + + node.query("DROP TABLE t") + node.query("DROP TABLE t_d") diff --git a/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/proxy-resolver/resolver.py b/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/proxy-resolver/resolver.py index 8c7611303b8..eaea4c1dab2 100644 --- a/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/proxy-resolver/resolver.py +++ b/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/proxy-resolver/resolver.py @@ -5,7 +5,10 @@ import bottle @bottle.route("/hostname") def index(): - return "proxy1" + if random.randrange(2) == 0: + return "proxy1" + else: + return "proxy2" bottle.run(host="0.0.0.0", port=8080) diff --git a/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/test.py b/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/test.py index ae872a33cd4..3c8a5de8691 100644 --- a/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/test.py +++ b/tests/integration/test_https_s3_table_function_with_http_proxy_no_tunneling/test.py @@ -56,7 +56,7 @@ def test_s3_with_https_proxy_list(cluster): def test_s3_with_https_remote_proxy(cluster): - proxy_util.simple_test(cluster, ["proxy1"], "https", "remote_proxy_node") + proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "https", "remote_proxy_node") def test_s3_with_https_env_proxy(cluster): diff --git a/tests/integration/test_intersecting_parts/__init__.py b/tests/integration/test_intersecting_parts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_intersecting_parts/test.py b/tests/integration/test_intersecting_parts/test.py new file mode 100644 index 00000000000..3a9732f22de --- /dev/null +++ b/tests/integration/test_intersecting_parts/test.py @@ -0,0 +1,71 @@ +import pytest +import logging + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", with_zookeeper=True) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +# This test construct intersecting parts intentially. It's not a elegent test. +# TODO(hanfei): write a test which select part 1_1 merging with part 2_2 and drop range. +def test_intersect_parts_when_restart(started_cluster): + node.query( + """ + CREATE TABLE data ( + key Int + ) + ENGINE = ReplicatedMergeTree('/ch/tables/default/data', 'node') + ORDER BY key; + """ + ) + node.query("system stop cleanup data") + node.query("INSERT INTO data values (1)") + node.query("INSERT INTO data values (2)") + node.query("INSERT INTO data values (3)") + node.query("INSERT INTO data values (4)") + node.query("ALTER TABLE data DROP PART 'all_1_1_0'") + node.query("ALTER TABLE data DROP PART 'all_2_2_0'") + node.query("OPTIMIZE TABLE data FINAL") + + part_path = node.query( + "SELECT path FROM system.parts WHERE table = 'data' and name = 'all_0_3_1'" + ).strip() + + assert len(part_path) != 0 + + node.query("detach table data") + new_path = part_path[:-6] + "1_2_3" + node.exec_in_container( + [ + "bash", + "-c", + "cp -r {p} {p1}".format(p=part_path, p1=new_path), + ], + privileged=True, + ) + + # mock empty part + node.exec_in_container( + [ + "bash", + "-c", + "echo -n 0 > {p1}/count.txt".format(p1=new_path), + ], + privileged=True, + ) + + node.query("attach table data") + data_size = node.query("SELECT sum(key) FROM data").strip() + assert data_size == "5" diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py index 9d7a46001e7..ca22c119281 100644 --- a/tests/integration/test_keeper_client/test.py +++ b/tests/integration/test_keeper_client/test.py @@ -47,24 +47,24 @@ def test_big_family(client: KeeperClient): assert response == TSV( [ - ["/test_big_family/1", "5"], - ["/test_big_family/2", "3"], - ["/test_big_family/2/3", "0"], - ["/test_big_family/2/2", "0"], - ["/test_big_family/2/1", "0"], - ["/test_big_family/1/5", "0"], - ["/test_big_family/1/4", "0"], - ["/test_big_family/1/3", "0"], - ["/test_big_family/1/2", "0"], - ["/test_big_family/1/1", "0"], + ["/test_big_family", "11"], + ["/test_big_family/1", "6"], + ["/test_big_family/2", "4"], + ["/test_big_family/2/3", "1"], + ["/test_big_family/2/2", "1"], + ["/test_big_family/2/1", "1"], + ["/test_big_family/1/5", "1"], + ["/test_big_family/1/4", "1"], + ["/test_big_family/1/3", "1"], + ["/test_big_family/1/2", "1"], ] ) - response = client.find_big_family("/test_big_family", 1) - + response = client.find_big_family("/test_big_family", 2) assert response == TSV( [ - ["/test_big_family/1", "5"], + ["/test_big_family", "11"], + ["/test_big_family/1", "6"], ] ) @@ -86,7 +86,12 @@ def test_find_super_nodes(client: KeeperClient): client.cd("/test_find_super_nodes") response = client.find_super_nodes(4) - assert response == TSV( + + # The order of the response is not guaranteed, so we need to sort it + normalized_response = response.strip().split("\n") + normalized_response.sort() + + assert TSV(normalized_response) == TSV( [ ["/test_find_super_nodes/1", "5"], ["/test_find_super_nodes/2", "4"], diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index f6f746c892e..6dfb2078559 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -17,7 +17,6 @@ node = cluster.add_instance( "node", main_configs=["configs/enable_keeper.xml"], stay_alive=True, - with_zookeeper=True, ) @@ -211,3 +210,46 @@ def test_invalid_snapshot(started_cluster): node_zk.close() except: pass + + +def test_snapshot_size(started_cluster): + keeper_utils.wait_until_connected(started_cluster, node) + node_zk = None + try: + node_zk = get_connection_zk("node") + + node_zk.create("/test_state_size", b"somevalue") + strs = [] + for i in range(100): + strs.append(random_string(123).encode()) + node_zk.create("/test_state_size/node" + str(i), strs[i]) + + node_zk.stop() + node_zk.close() + + keeper_utils.send_4lw_cmd(started_cluster, node, "csnp") + node.wait_for_log_line("Created persistent snapshot") + + def get_snapshot_size(): + return int( + next( + filter( + lambda line: "zk_latest_snapshot_size" in line, + keeper_utils.send_4lw_cmd(started_cluster, node, "mntr").split( + "\n" + ), + ) + ).split("\t")[1] + ) + + assert get_snapshot_size() != 0 + restart_clickhouse() + assert get_snapshot_size() != 0 + finally: + try: + if node_zk is not None: + node_zk.stop() + node_zk.close() + + except: + pass diff --git a/tests/integration/test_lazy_database/__init__.py b/tests/integration/test_lazy_database/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_log_family_s3/configs/minio.xml b/tests/integration/test_lazy_database/configs/storage_policy.xml similarity index 100% rename from tests/integration/test_log_family_s3/configs/minio.xml rename to tests/integration/test_lazy_database/configs/storage_policy.xml diff --git a/tests/integration/test_lazy_database/test.py b/tests/integration/test_lazy_database/test.py new file mode 100644 index 00000000000..6890aa87374 --- /dev/null +++ b/tests/integration/test_lazy_database/test.py @@ -0,0 +1,88 @@ +import logging +import time +import pytest +import os +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=["configs/storage_policy.xml"], + with_minio=True, + ) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def assert_objects_count(cluster, objects_count, path="data/"): + minio = cluster.minio_client + s3_objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True)) + if objects_count != len(s3_objects): + for s3_object in s3_objects: + object_meta = minio.stat_object(cluster.minio_bucket, s3_object.object_name) + logging.info("Existing S3 object: %s", str(object_meta)) + assert objects_count == len(s3_objects) + + +def list_of_files_on_ch_disk(node, disk, path): + disk_path = node.query( + f"SELECT path FROM system.disks WHERE name='{disk}'" + ).splitlines()[0] + return node.exec_in_container( + ["bash", "-c", f"ls {os.path.join(disk_path, path)}"], user="root" + ) + + +@pytest.mark.parametrize( + "engine", + [ + pytest.param("Log"), + ], +) +@pytest.mark.parametrize( + "disk,check_s3", + [ + pytest.param("default", False), + pytest.param("s3", True), + ], +) +@pytest.mark.parametrize( + "delay", + [ + pytest.param(0), + pytest.param(4), + ], +) +def test_drop_table(cluster, engine, disk, check_s3, delay): + node = cluster.instances["node"] + + node.query("DROP DATABASE IF EXISTS lazy") + node.query("CREATE DATABASE lazy ENGINE=Lazy(2)") + node.query( + "CREATE TABLE lazy.table (id UInt64) ENGINE={} SETTINGS disk = '{}'".format( + engine, + disk, + ) + ) + + node.query("INSERT INTO lazy.table SELECT number FROM numbers(10)") + assert node.query("SELECT count(*) FROM lazy.table") == "10\n" + if delay: + time.sleep(delay) + node.query("DROP TABLE lazy.table SYNC") + + if check_s3: + # There mustn't be any orphaned data + assert_objects_count(cluster, 0) + + # Local data must be removed + assert list_of_files_on_ch_disk(node, disk, "data/lazy/") == "" diff --git a/tests/integration/test_log_family_s3/configs/storage_configuration.xml b/tests/integration/test_log_family_s3/configs/storage_configuration.xml new file mode 100644 index 00000000000..d479a59b197 --- /dev/null +++ b/tests/integration/test_log_family_s3/configs/storage_configuration.xml @@ -0,0 +1,34 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + true + + 1 + 0 + 1 + 20000 + + + + + +
+ s3_no_retries +
+
+
+
+
+
diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index bed379d098b..ed84bdf48e6 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -11,7 +11,7 @@ def cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "node", - main_configs=["configs/minio.xml", "configs/ssl.xml"], + main_configs=["configs/storage_configuration.xml", "configs/ssl.xml"], with_minio=True, ) logging.info("Starting cluster...") @@ -84,3 +84,39 @@ def test_log_family_s3(cluster, log_engine, files_overhead, files_overhead_per_i assert_objects_count(cluster, 0) finally: node.query("DROP TABLE s3_test") + + +# Imitate case when error occurs while inserting into table. +# For examle S3::TooManyRequests. +# In that case we can update data file, but not the size file. +# So due to exception we should do truncate of the data file to undo the insert query. +# See FileChecker::repair(). +def test_stripe_log_truncate(cluster): + node = cluster.instances["node"] + + node.query( + """ + CREATE TABLE stripe_table ( + a int + ) ENGINE = StripeLog() + SETTINGS storage_policy='s3_no_retries' + """ + ) + + node.query("SYSTEM ENABLE FAILPOINT stripe_log_sink_write_fallpoint") + node.query( + """ + INSERT INTO stripe_table SELECT number FROM numbers(10) + """, + ignore_error=True, + ) + node.query("SYSTEM DISABLE FAILPOINT stripe_log_sink_write_fallpoint") + node.query("SELECT count(*) FROM stripe_table") == "0\n" + node.query("INSERT INTO stripe_table SELECT number FROM numbers(10)") + node.query("SELECT count(*) FROM stripe_table") == "10\n" + + # Make sure that everything is okey with the table after restart. + node.query("DETACH TABLE stripe_table") + node.query("ATTACH TABLE stripe_table") + + assert node.query("DROP TABLE stripe_table") == "" diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 382539df7de..b8e67551d79 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -90,7 +90,7 @@ def test_lost_part_same_replica(start_cluster): ) assert node1.contains_in_log( - "Created empty part" + f"Created empty part {victim_part_from_the_middle}" ), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed" assert node1.query("SELECT COUNT() FROM mt0") == "4\n" @@ -143,7 +143,10 @@ def test_lost_part_other_replica(start_cluster): node1.query("CHECK TABLE mt1") node2.query("SYSTEM START REPLICATION QUEUES") - res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1") + # Reduce timeout in sync replica since it might never finish with merge stopped and we don't want to wait 300s + res, err = node1.query_and_get_answer_with_error( + "SYSTEM SYNC REPLICA mt1", settings={"receive_timeout": 30} + ) print("result: ", res) print("error: ", res) @@ -158,10 +161,10 @@ def test_lost_part_other_replica(start_cluster): ) assert node1.contains_in_log( - "Created empty part" - ), "Seems like empty part {} is not created or log message changed".format( - victim_part_from_the_middle - ) + f"Created empty part {victim_part_from_the_middle}" + ) or node1.contains_in_log( + f"Part {victim_part_from_the_middle} looks broken. Removing it and will try to fetch." + ), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed" assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4") assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 57f2ccd720d..86000799ae4 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -3413,3 +3413,42 @@ def gtid_after_attach_test(clickhouse_node, mysql_node, replication): interval_seconds=1, retry_count=300, ) + + +def mysql_create_database_without_connection(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS create_without_connection") + clickhouse_node.query("DROP DATABASE IF EXISTS create_without_connection") + mysql_node.query("CREATE DATABASE create_without_connection") + mysql_node.query( + "CREATE TABLE create_without_connection.test ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + + clickhouse_node.cluster.pause_container(service_name) + + assert "ConnectionFailed:" in clickhouse_node.query_and_get_error( + """ + CREATE DATABASE create_without_connection + ENGINE = MaterializedMySQL('{}:3306', 'create_without_connection', 'root', 'clickhouse') + """.format( + service_name + ) + ) + + clickhouse_node.query( + """ + CREATE DATABASE create_without_connection + ENGINE = MaterializedMySQL('{}:3306', 'create_without_connection', 'root', 'clickhouse') + SETTINGS allow_startup_database_without_connection_to_mysql=1 + """.format( + service_name + ) + ) + + clickhouse_node.cluster.unpause_container(service_name) + mysql_node.alloc_connection() + + check_query( + clickhouse_node, + "SHOW TABLES FROM create_without_connection FORMAT TSV", + "test\n", + ) diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 57e496fe737..080a850a8c6 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -721,3 +721,11 @@ def test_binlog_client(started_cluster, started_mysql_8_0, replication): materialized_with_ddl.gtid_after_attach_test( node_db, started_mysql_8_0, replication ) + + +def test_create_database_without_mysql_connection( + started_cluster, started_mysql_8_0, clickhouse_node: ClickHouseInstance +): + materialized_with_ddl.mysql_create_database_without_connection( + clickhouse_node, started_mysql_8_0, "mysql80" + ) diff --git a/tests/integration/test_max_suspicious_broken_parts_replicated/test.py b/tests/integration/test_max_suspicious_broken_parts_replicated/test.py index 0d009e6b132..683715da404 100644 --- a/tests/integration/test_max_suspicious_broken_parts_replicated/test.py +++ b/tests/integration/test_max_suspicious_broken_parts_replicated/test.py @@ -223,4 +223,4 @@ def test_corrupted_unexpected_part_ultimate(): == "1\n" ) - assert node.query("SELECT sum(key) FROM broken_table_3") == "190\n" + assert node.query("SELECT sum(key) FROM broken_table_3") == "145\n" diff --git a/tests/integration/test_merge_tree_load_marks/__init__.py b/tests/integration/test_merge_tree_load_marks/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_merge_tree_load_marks/configs/config.xml b/tests/integration/test_merge_tree_load_marks/configs/config.xml new file mode 100644 index 00000000000..1c9ee8d698f --- /dev/null +++ b/tests/integration/test_merge_tree_load_marks/configs/config.xml @@ -0,0 +1,12 @@ + + + system +
text_log
+ 7500 + 1048576 + 8192 + 524288 + false + test + + diff --git a/tests/integration/test_merge_tree_load_marks/test.py b/tests/integration/test_merge_tree_load_marks/test.py new file mode 100644 index 00000000000..a7078017ac9 --- /dev/null +++ b/tests/integration/test_merge_tree_load_marks/test.py @@ -0,0 +1,62 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +# This test is bad and it should be a functional test but S3 metrics +# are accounted incorrectly for merges in part_log and query_log. +# Also we have text_log with level 'trace' in functional tests +# but this test requeires text_log with level 'test'. + + +@pytest.mark.parametrize("min_bytes_for_wide_part", [0, 1000000000]) +def test_merge_load_marks(started_cluster, min_bytes_for_wide_part): + node.query( + f""" + DROP TABLE IF EXISTS t_load_marks; + + CREATE TABLE t_load_marks (a UInt64, b UInt64) + ENGINE = MergeTree ORDER BY a + SETTINGS min_bytes_for_wide_part = {min_bytes_for_wide_part}; + + INSERT INTO t_load_marks SELECT number, number FROM numbers(1000); + INSERT INTO t_load_marks SELECT number, number FROM numbers(1000); + + OPTIMIZE TABLE t_load_marks FINAL; + SYSTEM FLUSH LOGS; + """ + ) + + uuid = node.query( + "SELECT uuid FROM system.tables WHERE table = 't_load_marks'" + ).strip() + + result = node.query( + f""" + SELECT count() + FROM system.text_log + WHERE (query_id LIKE '%{uuid}::all_1_2_1%') AND (message LIKE '%Loading marks%') + """ + ).strip() + + result = int(result) + + is_wide = min_bytes_for_wide_part == 0 + not_loaded = result == 0 + + assert is_wide == not_loaded diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 9216b08f942..0bf81e81383 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -857,9 +857,9 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli error = node.query_and_get_error( "OPTIMIZE TABLE test_merge_canceled_by_s3_errors FINAL", ) - assert "ExpectedError Message: mock s3 injected error" in error, error + assert "ExpectedError Message: mock s3 injected unretryable error" in error, error - node.wait_for_log_line("ExpectedError Message: mock s3 injected error") + node.wait_for_log_line("ExpectedError Message: mock s3 injected unretryable error") table_uuid = node.query( "SELECT uuid FROM system.tables WHERE database = 'default' AND name = 'test_merge_canceled_by_s3_errors' LIMIT 1" @@ -867,7 +867,7 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli node.query("SYSTEM FLUSH LOGS") error_count_in_blob_log = node.query( - f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected error%'" + f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected unretryable error%'" ).strip() assert int(error_count_in_blob_log) > 0, node.query( f"SELECT * FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' FORMAT PrettyCompactMonoBlock" @@ -911,7 +911,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): node.query("OPTIMIZE TABLE merge_canceled_by_s3_errors_when_move FINAL") - node.wait_for_log_line("ExpectedError Message: mock s3 injected error") + node.wait_for_log_line("ExpectedError Message: mock s3 injected unretryable error") count = node.query("SELECT count() FROM merge_canceled_by_s3_errors_when_move") assert int(count) == 2000, count diff --git a/tests/integration/test_move_ttl_broken_compatibility/__init__.py b/tests/integration/test_move_ttl_broken_compatibility/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_move_ttl_broken_compatibility/configs/storage_conf.xml b/tests/integration/test_move_ttl_broken_compatibility/configs/storage_conf.xml new file mode 100644 index 00000000000..1b2177d0392 --- /dev/null +++ b/tests/integration/test_move_ttl_broken_compatibility/configs/storage_conf.xml @@ -0,0 +1,36 @@ + + + test + + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + + + + default + + + + + + default + False + +
+ s3 + False +
+
+ 0.0 +
+
+
+
diff --git a/tests/integration/test_move_ttl_broken_compatibility/test.py b/tests/integration/test_move_ttl_broken_compatibility/test.py new file mode 100644 index 00000000000..f9eab8b5ebb --- /dev/null +++ b/tests/integration/test_move_ttl_broken_compatibility/test.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +import logging +import random +import string +import time + +import pytest +from helpers.cluster import ClickHouseCluster +import minio + + +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.add_instance( + "node1", + main_configs=["configs/storage_conf.xml"], + image="clickhouse/clickhouse-server", + with_minio=True, + tag="24.1", + stay_alive=True, + with_installed_binary=True, + ) + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_bc_compatibility(started_cluster): + node1 = cluster.instances["node1"] + node1.query( + """ + CREATE TABLE test_ttl_table ( + generation UInt64, + date_key DateTime, + number UInt64, + text String, + expired DateTime DEFAULT now() + ) + ENGINE=MergeTree + ORDER BY (generation, date_key) + PARTITION BY toMonth(date_key) + TTL expired + INTERVAL 20 SECONDS TO DISK 's3' + SETTINGS storage_policy = 's3'; + """ + ) + + node1.query( + """ + INSERT INTO test_ttl_table ( + generation, + date_key, + number, + text + ) + SELECT + 1, + toDateTime('2000-01-01 00:00:00') + rand(number) % 365 * 86400, + number, + toString(number) + FROM numbers(10000); + """ + ) + + disks = ( + node1.query( + """ + SELECT distinct disk_name + FROM system.parts + WHERE table = 'test_ttl_table' + """ + ) + .strip() + .split("\n") + ) + print("Disks before", disks) + + assert len(disks) == 1 + assert disks[0] == "default" + + node1.restart_with_latest_version() + + for _ in range(60): + disks = ( + node1.query( + """ + SELECT distinct disk_name + FROM system.parts + WHERE table = 'test_ttl_table' + """ + ) + .strip() + .split("\n") + ) + print("Disks after", disks) + if "s3" in disks: + break + time.sleep(1) + assert "s3" in disks diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index fdd81284b2a..e97ffeb4cc3 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -1783,15 +1783,12 @@ def test_move_across_policies_does_not_work(start_cluster): except QueryRuntimeException: """All parts of partition 'all' are already on disk 'jbod2'.""" - with pytest.raises( - QueryRuntimeException, - match=".*because disk does not belong to storage policy.*", - ): - node1.query( - """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( - name=name - ) + # works when attach + node1.query( + """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( + name=name ) + ) with pytest.raises( QueryRuntimeException, @@ -1814,7 +1811,7 @@ def test_move_across_policies_does_not_work(start_cluster): ) assert node1.query( - """SELECT * FROM {name}""".format(name=name) + """SELECT * FROM {name}2""".format(name=name) ).splitlines() == ["1"] finally: diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index 18e2eb43813..187cae5c957 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -2,7 +2,7 @@ import random import string import pytest -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, is_arm cluster = ClickHouseCluster(__file__) @@ -255,6 +255,11 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster): def test_preconfigured_deflateqpl_codec(start_cluster): + if is_arm(): + pytest.skip( + "Skipping test because it's special test for Intel code (doesn't work on ARM)" + ) + node6.query( """ CREATE TABLE compression_codec_multiple_with_key ( diff --git a/tests/integration/test_parallel_replicas_failover/__init__.py b/tests/integration/test_parallel_replicas_failover/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_parallel_replicas_failover/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_failover/configs/remote_servers.xml new file mode 100644 index 00000000000..ac46202cc95 --- /dev/null +++ b/tests/integration/test_parallel_replicas_failover/configs/remote_servers.xml @@ -0,0 +1,22 @@ + + + + + true + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + + diff --git a/tests/integration/test_parallel_replicas_failover/test.py b/tests/integration/test_parallel_replicas_failover/test.py new file mode 100644 index 00000000000..bf25136bff7 --- /dev/null +++ b/tests/integration/test_parallel_replicas_failover/test.py @@ -0,0 +1,114 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node2 = cluster.add_instance( + "node2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node3 = cluster.add_instance( + "node3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def create_tables(cluster, table_name, skip_last_replica): + node1.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + node2.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + node3.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + + node1.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" + ) + node2.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)" + ) + if not skip_last_replica: + node3.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" + ) + + # populate data + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(2000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(3000, 1000)" + ) + node2.query(f"SYSTEM SYNC REPLICA {table_name}") + if not skip_last_replica: + node3.query(f"SYSTEM SYNC REPLICA {table_name}") + + +def test_skip_replicas_without_table(start_cluster): + cluster_name = "test_1_shard_3_replicas" + table_name = "tt" + create_tables(cluster_name, table_name, skip_last_replica=True) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t1000\n" + + log_comment = "5230b069-9574-407d-9b80-891b5a175f41" + assert ( + node1.query( + f"SELECT key, count() FROM {table_name} GROUP BY key ORDER BY key", + settings={ + "allow_experimental_parallel_reading_from_replicas": 2, + "max_parallel_replicas": 3, + "cluster_for_parallel_replicas": cluster_name, + "log_comment": log_comment, + }, + ) + == expected_result + ) + + node1.query("SYSTEM FLUSH LOGS") + assert ( + node1.query( + f"SELECT ProfileEvents['DistributedConnectionMissingTable'], ProfileEvents['ParallelReplicasUnavailableCount'] FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '{log_comment}' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0" + ) + == "1\t1\n" + ) + + +def test_skip_unresponsive_replicas(start_cluster): + cluster_name = "test_1_shard_3_replicas" + table_name = "tt" + create_tables(cluster_name, table_name, skip_last_replica=False) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t1000\n" + + node1.query("SYSTEM ENABLE FAILPOINT receive_timeout_on_table_status_response") + + assert ( + node1.query( + f"SELECT key, count() FROM {table_name} GROUP BY key ORDER BY key", + settings={ + "allow_experimental_parallel_reading_from_replicas": 2, + "max_parallel_replicas": 3, + "cluster_for_parallel_replicas": cluster_name, + }, + ) + == expected_result + ) diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml index 7d779cb0d2e..5150e9096de 100644 --- a/tests/integration/test_replicated_database/configs/config.xml +++ b/tests/integration/test_replicated_database/configs/config.xml @@ -5,4 +5,5 @@ 10 50 + 42 diff --git a/tests/integration/test_replicated_database/configs/inconsistent_settings.xml b/tests/integration/test_replicated_database/configs/inconsistent_settings.xml new file mode 100644 index 00000000000..13dc1eae976 --- /dev/null +++ b/tests/integration/test_replicated_database/configs/inconsistent_settings.xml @@ -0,0 +1,18 @@ + + + + 1 + 1 + 0 + 0 + + 0 + 1 + + + + + default + + + diff --git a/tests/integration/test_replicated_database/configs/settings2.xml b/tests/integration/test_replicated_database/configs/settings2.xml new file mode 100644 index 00000000000..dad5740a8ae --- /dev/null +++ b/tests/integration/test_replicated_database/configs/settings2.xml @@ -0,0 +1,17 @@ + + + + 1 + 1 + 0 + 0 + + 0 + + + + + default + + + diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 706620cfaef..fd1bfc75227 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -24,7 +24,7 @@ main_node = cluster.add_instance( dummy_node = cluster.add_instance( "dummy_node", main_configs=["configs/config.xml"], - user_configs=["configs/settings.xml"], + user_configs=["configs/settings2.xml"], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 2}, @@ -59,6 +59,14 @@ all_nodes = [ snapshot_recovering_node, ] +bad_settings_node = cluster.add_instance( + "bad_settings_node", + main_configs=["configs/config.xml"], + user_configs=["configs/inconsistent_settings.xml"], + with_zookeeper=True, + macros={"shard": 1, "replica": 4}, +) + uuid_regex = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") @@ -79,6 +87,29 @@ def started_cluster(): cluster.shutdown() +def test_flatten_nested(started_cluster): + main_node.query( + "CREATE DATABASE create_replicated_table ENGINE = Replicated('/test/create_replicated_table', 'shard1', 'replica' || '1');" + ) + dummy_node.query( + "CREATE DATABASE create_replicated_table ENGINE = Replicated('/test/create_replicated_table', 'shard1', 'replica2');" + ) + + main_node.query( + "CREATE TABLE create_replicated_table.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);" + ) + + main_node.query( + "CREATE MATERIALIZED VIEW create_replicated_table.mv ENGINE=ReplicatedMergeTree ORDER BY tuple() AS select d, cast([(k, toString(i32))] as Nested(a UInt64, b String)) from create_replicated_table.replicated_table" + ) + + assert main_node.query( + "show create create_replicated_table.mv" + ) == dummy_node.query("show create create_replicated_table.mv") + main_node.query("DROP DATABASE create_replicated_table SYNC") + dummy_node.query("DROP DATABASE create_replicated_table SYNC") + + def test_create_replicated_table(started_cluster): main_node.query( "CREATE DATABASE create_replicated_table ENGINE = Replicated('/test/create_replicated_table', 'shard1', 'replica' || '1');" @@ -1443,3 +1474,51 @@ def test_table_metadata_corruption(started_cluster): main_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") dummy_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") + + +def test_auto_recovery(started_cluster): + dummy_node.query("DROP DATABASE IF EXISTS auto_recovery") + bad_settings_node.query("DROP DATABASE IF EXISTS auto_recovery") + + dummy_node.query( + "CREATE DATABASE auto_recovery ENGINE = Replicated('/clickhouse/databases/auto_recovery', 'shard1', 'replica1');" + ) + bad_settings_node.query( + "CREATE DATABASE auto_recovery ENGINE = Replicated('/clickhouse/databases/auto_recovery', 'shard1', 'replica2') SETTINGS max_retries_before_automatic_recovery=3;" + ) + + dummy_node.query( + "CREATE TABLE auto_recovery.t1 (n int) ENGINE=ReplicatedMergeTree ORDER BY n" + ) + dummy_node.query("INSERT INTO auto_recovery.t1 SELECT 42") + # dummy_node has 0 (default is 1), + # so it will consider that the setting is changed, and will write it to the DDL entry + # bad_settings_node has implicit_transaction=1, so it will fail and recover from snapshot + dummy_node.query( + "CREATE TABLE auto_recovery.t2 (n int) ENGINE=ReplicatedMergeTree ORDER BY tuple()", + settings={ + "throw_on_unsupported_query_inside_transaction": 1, + "distributed_ddl_task_timeout": 0, + }, + ) + dummy_node.query("INSERT INTO auto_recovery.t2 SELECT 137") + dummy_node.query( + "EXCHANGE TABLES auto_recovery.t1 AND auto_recovery.t2", + settings={"distributed_ddl_task_timeout": 0}, + ) + + bad_settings_node.query( + "SYSTEM SYNC DATABASE REPLICA auto_recovery", settings={"receive_timeout": 60} + ) + assert bad_settings_node.contains_in_log( + "Unexpected error (3 times in a row), will try to restart main thread" + ) + assert bad_settings_node.contains_in_log("Cannot begin an implicit transaction") + bad_settings_node.query("SYSTEM SYNC REPLICA auto_recovery.t1") + bad_settings_node.query("SYSTEM SYNC REPLICA auto_recovery.t2") + + assert "42\n" == dummy_node.query("SELECT * FROM auto_recovery.t2") + assert "137\n" == dummy_node.query("SELECT * FROM auto_recovery.t1") + + assert "42\n" == bad_settings_node.query("SELECT * FROM auto_recovery.t2") + assert "137\n" == bad_settings_node.query("SELECT * FROM auto_recovery.t1") diff --git a/tests/integration/test_s3_plain_rewritable/configs/storage_conf.xml b/tests/integration/test_s3_plain_rewritable/configs/storage_conf.xml index 1e4641fc8b2..23368394494 100644 --- a/tests/integration/test_s3_plain_rewritable/configs/storage_conf.xml +++ b/tests/integration/test_s3_plain_rewritable/configs/storage_conf.xml @@ -4,9 +4,17 @@ s3_plain_rewritable http://minio1:9001/root/data/ + minio minio123 + + cache + disk_s3_plain_rewritable + /var/lib/clickhouse/disks/s3_plain_rewritable_cache/ + 1000000000 + 1 +
@@ -16,6 +24,13 @@ + + +
+ disk_cache_s3_plain_rewritable +
+
+
diff --git a/tests/integration/test_s3_plain_rewritable/test.py b/tests/integration/test_s3_plain_rewritable/test.py index 5e27a690f1f..4b1aaafc814 100644 --- a/tests/integration/test_s3_plain_rewritable/test.py +++ b/tests/integration/test_s3_plain_rewritable/test.py @@ -1,28 +1,41 @@ import pytest import random import string +import threading from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", - main_configs=["configs/storage_conf.xml"], - with_minio=True, - stay_alive=True, -) -insert_values = [ - "(0,'data'),(1,'data')", - ",".join( +NUM_WORKERS = 5 +MAX_ROWS = 1000 + + +def gen_insert_values(size): + return ",".join( f"({i},'{''.join(random.choices(string.ascii_lowercase, k=5))}')" - for i in range(10) - ), -] + for i in range(size) + ) + + +insert_values = ",".join( + f"({i},'{''.join(random.choices(string.ascii_lowercase, k=5))}')" for i in range(10) +) @pytest.fixture(scope="module", autouse=True) def start_cluster(): + for i in range(NUM_WORKERS): + cluster.add_instance( + f"node{i + 1}", + main_configs=["configs/storage_conf.xml"], + with_minio=True, + env_variables={"ENDPOINT_SUBPATH": f"node{i + 1}"}, + stay_alive=True, + # Override ENDPOINT_SUBPATH. + instance_env_variables=i > 0, + ) + try: cluster.start() yield cluster @@ -30,49 +43,105 @@ def start_cluster(): cluster.shutdown() -@pytest.mark.order(0) -def test_insert(): - for index, value in enumerate(insert_values): +@pytest.mark.parametrize( + "storage_policy", + [ + pytest.param("s3_plain_rewritable"), + pytest.param("cache_s3_plain_rewritable"), + ], +) +def test(storage_policy): + def create_insert(node, insert_values): node.query( """ - CREATE TABLE test_{} ( + CREATE TABLE test ( id Int64, data String ) ENGINE=MergeTree() ORDER BY id - SETTINGS storage_policy='s3_plain_rewritable' + SETTINGS storage_policy='{}' """.format( - index + storage_policy ) ) + node.query("INSERT INTO test VALUES {}".format(insert_values)) - node.query("INSERT INTO test_{} VALUES {}".format(index, value)) + insert_values_arr = [ + gen_insert_values(random.randint(1, MAX_ROWS)) for _ in range(0, NUM_WORKERS) + ] + threads = [] + assert len(cluster.instances) == NUM_WORKERS + for i in range(NUM_WORKERS): + node = cluster.instances[f"node{i + 1}"] + t = threading.Thread(target=create_insert, args=(node, insert_values_arr[i])) + threads.append(t) + t.start() + + for t in threads: + t.join() + + for i in range(NUM_WORKERS): + node = cluster.instances[f"node{i + 1}"] assert ( - node.query("SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)) - == value + node.query("SELECT * FROM test ORDER BY id FORMAT Values") + == insert_values_arr[i] ) - -@pytest.mark.order(1) -def test_restart(): - for index, value in enumerate(insert_values): + for i in range(NUM_WORKERS): + node = cluster.instances[f"node{i + 1}"] + node.query("ALTER TABLE test MODIFY SETTING old_parts_lifetime = 59") assert ( - node.query("SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)) - == value - ) - node.restart_clickhouse() - - for index, value in enumerate(insert_values): - assert ( - node.query("SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)) - == value + node.query( + "SELECT engine_full from system.tables WHERE database = currentDatabase() AND name = 'test'" + ).find("old_parts_lifetime = 59") + != -1 ) + node.query("ALTER TABLE test RESET SETTING old_parts_lifetime") + assert ( + node.query( + "SELECT engine_full from system.tables WHERE database = currentDatabase() AND name = 'test'" + ).find("old_parts_lifetime") + == -1 + ) + node.query("ALTER TABLE test MODIFY COMMENT 'new description'") + assert ( + node.query( + "SELECT comment from system.tables WHERE database = currentDatabase() AND name = 'test'" + ).find("new description") + != -1 + ) -@pytest.mark.order(2) -def test_drop(): - for index, value in enumerate(insert_values): - node.query("DROP TABLE IF EXISTS test_{} SYNC".format(index)) + insert_values_arr = [] + for i in range(NUM_WORKERS): + node = cluster.instances[f"node{i + 1}"] + insert_values_arr.append( + node.query("SELECT * FROM test ORDER BY id FORMAT Values") + ) + + def restart(node): + node.restart_clickhouse() + + threads = [] + for i in range(NUM_WORKERS): + node = cluster.instances[f"node{i + 1}"] + t = threading.Thread(target=restart, args=(node,)) + threads.append(t) + t.start() + + for t in threads: + t.join() + + for i in range(NUM_WORKERS): + node = cluster.instances[f"node{i + 1}"] + assert ( + node.query("SELECT * FROM test ORDER BY id FORMAT Values") + == insert_values_arr[i] + ) + + for i in range(NUM_WORKERS): + node = cluster.instances[f"node{i + 1}"] + node.query("DROP TABLE IF EXISTS test SYNC") it = cluster.minio_client.list_objects( cluster.minio_bucket, "data/", recursive=True diff --git a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml index 24c1eb29fbc..84e91495304 100644 --- a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml +++ b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/proxy_list.xml @@ -2,7 +2,6 @@ http://proxy1 - http://proxy2 diff --git a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml index 94ac83b32ac..1d31272a395 100644 --- a/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_s3_storage_conf_new_proxy/configs/config.d/storage_conf.xml @@ -3,7 +3,7 @@ s3 - http://minio1:9001/root/data/ + http://minio1:9001/root/data/s3 minio minio123 diff --git a/tests/integration/test_s3_storage_conf_new_proxy/test.py b/tests/integration/test_s3_storage_conf_new_proxy/test.py index c98eb05a217..3b3b07aaa09 100644 --- a/tests/integration/test_s3_storage_conf_new_proxy/test.py +++ b/tests/integration/test_s3_storage_conf_new_proxy/test.py @@ -3,6 +3,7 @@ import time import pytest from helpers.cluster import ClickHouseCluster +import helpers.s3_url_proxy_tests_util as proxy_util @pytest.fixture(scope="module") @@ -26,41 +27,8 @@ def cluster(): cluster.shutdown() -def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET"}): - for i in range(10): - logs = cluster.get_container_logs(proxy_instance) - # Check with retry that all possible interactions with Minio are present - for http_method in http_methods: - if logs.find(http_method + " http://minio1") >= 0: - return - time.sleep(1) - else: - assert False, f"{http_methods} method not found in logs of {proxy_instance}" - - @pytest.mark.parametrize("policy", ["s3"]) def test_s3_with_proxy_list(cluster, policy): - node = cluster.instances["node"] - - node.query( - """ - CREATE TABLE s3_test ( - id Int64, - data String - ) ENGINE=MergeTree() - ORDER BY id - SETTINGS storage_policy='{}' - """.format( - policy - ) + proxy_util.simple_storage_test( + cluster, cluster.instances["node"], ["proxy1"], policy ) - node.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')") - assert ( - node.query("SELECT * FROM s3_test order by id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - node.query("DROP TABLE IF EXISTS s3_test SYNC") - - for proxy in ["proxy1", "proxy2"]: - check_proxy_logs(cluster, proxy, ["PUT", "GET"]) diff --git a/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml index 132eac4a2a6..73e7e8175c5 100644 --- a/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_s3_storage_conf_proxy/configs/config.d/storage_conf.xml @@ -3,7 +3,7 @@ s3 - http://minio1:9001/root/data/ + http://minio1:9001/root/data/s3 minio minio123 @@ -13,9 +13,10 @@ s3 - http://minio1:9001/root/data/ + http://minio1:9001/root/data/s3_with_resolver minio minio123 + true + +Akiba_Hebrew_Academy 2017-08-01 241 +Aegithina_tiphia 2018-02-01 34 +1971-72_Utah_Stars_season 2016-10-01 1 + +<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 --> + +Akiba_Hebrew_Academy 2017-08-01 241 +Aegithina_tiphia 2018-02-01 34 +1971-72_Utah_Stars_season 2016-10-01 1 diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh new file mode 100755 index 00000000000..14f28f1ba4a --- /dev/null +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Data preparation step +USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +UNIX_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" +DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" +DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}" +DATA_FILE_DOS_ENDINGS="${USER_FILES_PATH:?}/${DOS_ENDINGS}" + +touch $DATA_FILE_UNIX_ENDINGS +touch $DATA_FILE_DOS_ENDINGS + +echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t34\n1971-72_Utah_Stars_season\t2016-10-01\t1\n" > $DATA_FILE_UNIX_ENDINGS +echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r\n" > $DATA_FILE_DOS_ENDINGS + +echo -e "<-- Read UNIX endings -->\n" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" +$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" + +echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" + +# Test teardown +rm $DATA_FILE_UNIX_ENDINGS +rm $DATA_FILE_DOS_ENDINGS diff --git a/tests/queries/0_stateless/02982_create_mv_inner_extra.reference b/tests/queries/0_stateless/02982_create_mv_inner_extra.reference new file mode 100644 index 00000000000..06a60436b2f --- /dev/null +++ b/tests/queries/0_stateless/02982_create_mv_inner_extra.reference @@ -0,0 +1,5 @@ +CREATE TABLE x (`key` String) ENGINE = MergeTree PRIMARY KEY key ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE x (`key` String) ENGINE = MergeTree PRIMARY KEY tuple(key) ORDER BY tuple(key) SETTINGS index_granularity = 8192 +CREATE TABLE x (`key` String) ENGINE = Null +CREATE TABLE x (`key` String, INDEX idx key TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE x (`key` String, PROJECTION p (SELECT uniqCombined(key))) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02982_create_mv_inner_extra.sql b/tests/queries/0_stateless/02982_create_mv_inner_extra.sql new file mode 100644 index 00000000000..372d61c1683 --- /dev/null +++ b/tests/queries/0_stateless/02982_create_mv_inner_extra.sql @@ -0,0 +1,58 @@ +-- Tags: no-random-merge-tree-settings + +DROP TABLE IF EXISTS data; +DROP TABLE IF EXISTS mv_indexes; +DROP TABLE IF EXISTS mv_no_indexes; +DROP TABLE IF EXISTS mv_projections; +DROP TABLE IF EXISTS mv_primary_key; +DROP TABLE IF EXISTS mv_primary_key_from_column; + +CREATE TABLE data +( + key String, +) +ENGINE = MergeTree +ORDER BY key; + +CREATE MATERIALIZED VIEW mv_indexes +( + key String, + INDEX idx key TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY key +AS SELECT * FROM data; + +CREATE MATERIALIZED VIEW mv_no_indexes +( + key String, + INDEX idx key TYPE bloom_filter GRANULARITY 1 +) +ENGINE = Null +AS SELECT * FROM data; + +CREATE MATERIALIZED VIEW mv_projections +( + key String, + projection p (SELECT uniqCombined(key)) +) +ENGINE = MergeTree +ORDER BY key +AS SELECT * FROM data; + +CREATE MATERIALIZED VIEW mv_primary_key +( + key String, + PRIMARY KEY key +) +ENGINE = MergeTree +AS SELECT * FROM data; + +CREATE MATERIALIZED VIEW mv_primary_key_from_column +( + key String PRIMARY KEY +) +ENGINE = MergeTree +AS SELECT * FROM data; + +SELECT replaceRegexpOne(create_table_query, 'CREATE TABLE [^ ]*', 'CREATE TABLE x') FROM system.tables WHERE database = currentDatabase() and table LIKE '.inner%' ORDER BY 1 FORMAT LineAsString; diff --git a/tests/queries/0_stateless/02984_form_format.reference b/tests/queries/0_stateless/02984_form_format.reference new file mode 100644 index 00000000000..34612dbbb50 --- /dev/null +++ b/tests/queries/0_stateless/02984_form_format.reference @@ -0,0 +1,90 @@ +42 Hello, World! +Hello, World! +col1 Nullable(Int64) +col2 Nullable(String) +col3 Nullable(String) +42 Hello, World! [1, 2, 3] +Row 1: +────── +c.e: ls7xfkpm +c.tti.m: raf +rt.start: navigation +rt.bmr: 390,11,10 +rt.tstart: 1707076768666 +rt.bstart: 1707076769091 +rt.blstart: 1707076769056 +rt.end: 1707076769078 +t_resp: 296 +t_page: 116 +t_done: 412 +t_other: boomerang|6,boomr_fb|425,boomr_ld|390,boomr_lat|35 +rt.tt: 2685 +rt.obo: 0 +pt.fcp: 407 +nt_nav_st: 1707076768666 +nt_dns_st: 1707076768683 +nt_dns_end: 1707076768684 +nt_con_st: 1707076768684 +nt_con_end: 1707076768850 +nt_req_st: 1707076768850 +nt_res_st: 1707076768962 +nt_res_end: 1707076768962 +nt_domloading: 1707076769040 +nt_domint: 1707076769066 +nt_domcontloaded_st: 1707076769067 +nt_domcontloaded_end: 1707076769068 +nt_domcomp: 1707076769069 +nt_load_st: 1707076769069 +nt_load_end: 1707076769078 +nt_unload_st: 1707076769040 +nt_unload_end: 1707076769041 +nt_ssl_st: 1707076768788 +nt_enc_size: 3209 +nt_dec_size: 10093 +nt_trn_size: 3940 +nt_protocol: h2 +nt_red_cnt: 0 +nt_nav_type: 1 +restiming: {"https://www.basicrum.com/":{"publications/":"6,88,88,54,54,3e,i,i,h*12h5,kb,5b8","assets/js/":{"just-the-docs.js":"3am,e,e*12pc,_,8oj*20","boomerang-1.737.60.cutting-edge.min.js":"2au,b,a*1pu3,_,1m19*21*42","vendor/lunr.min.js":"3am,d,8*16t2,_,fym*20"}}} +u: https://www.basicrum.com/publications/ +r: https://www.basicrum.com/cost-analyses/ +v: 1.737.60 +sv: 14 +sm: p +rt.si: dd0c542f-7adf-4310-830a-6c0a3d157c90-s8cjr1 +rt.ss: 1707075325294 +rt.sl: 4 +vis.st: visible +ua.plt: Linux x86_64 +ua.vnd: +pid: 8fftz949 +n: 1 +c.t.fps: 07*4*65*j*61 +c.t.busy: 2*4*0034 +c.tti.vr: 408 +c.tti: 408 +c.b: 2 +c.f: 60 +c.f.d: 2511 +c.f.m: 1 +c.f.s: ls7xfl1h +dom.res: 5 +dom.doms: 1 +mem.lsln: 0 +mem.ssln: 0 +mem.lssz: 2 +mem.sssz: 2 +scr.xy: 1920x1200 +scr.bpp: 24/24 +scr.orn: 0/landscape-primary +cpu.cnc: 16 +dom.ln: 114 +dom.sz: 10438 +dom.ck: 157 +dom.img: 0 +dom.script: 6 +dom.script.ext: 3 +dom.iframe: 0 +dom.link: 4 +dom.link.css: 1 +sb: 1 diff --git a/tests/queries/0_stateless/02984_form_format.sh b/tests/queries/0_stateless/02984_form_format.sh new file mode 100755 index 00000000000..ce5feb60130 --- /dev/null +++ b/tests/queries/0_stateless/02984_form_format.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test setup +USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILE_NAME="data.tmp" +FORM_DATA="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/${FILE_NAME}" +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +touch $FORM_DATA + +# Simple tests +echo -ne "col1=42&col2=Hello%2C%20World%21" > $FORM_DATA +$CLICKHOUSE_CLIENT -q "SELECT * from file('$FORM_DATA', Form, 'col1 UInt64, col2 String')" +$CLICKHOUSE_CLIENT -q "SELECT * from file('$FORM_DATA', Form, 'col2 String')" +rm $FORM_DATA + +# Schema reader test +touch $FORM_DATA +echo -ne "col1=42&col2=Hello%2C%20World%21&col3=%5B1%2C%202%2C%203%5D" > $FORM_DATA +$CLICKHOUSE_CLIENT -q "DESC file('$FORM_DATA', Form)" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FORM_DATA', Form)" +rm $FORM_DATA + +# Test with data-raw from request +touch $FORM_DATA +echo -ne "c.e=ls7xfkpm&c.tti.m=raf&rt.start=navigation&rt.bmr=390%2C11%2C10&rt.tstart=1707076768666&rt.bstart=1707076769091&rt.blstart=1707076769056&rt.end=1707076769078&t_resp=296&t_page=116&t_done=412&t_other=boomerang%7C6%2Cboomr_fb%7C425%2Cboomr_ld%7C390%2Cboomr_lat%7C35&rt.tt=2685&rt.obo=0&pt.fcp=407&nt_nav_st=1707076768666&nt_dns_st=1707076768683&nt_dns_end=1707076768684&nt_con_st=1707076768684&nt_con_end=1707076768850&nt_req_st=1707076768850&nt_res_st=1707076768962&nt_res_end=1707076768962&nt_domloading=1707076769040&nt_domint=1707076769066&nt_domcontloaded_st=1707076769067&nt_domcontloaded_end=1707076769068&nt_domcomp=1707076769069&nt_load_st=1707076769069&nt_load_end=1707076769078&nt_unload_st=1707076769040&nt_unload_end=1707076769041&nt_ssl_st=1707076768788&nt_enc_size=3209&nt_dec_size=10093&nt_trn_size=3940&nt_protocol=h2&nt_red_cnt=0&nt_nav_type=1&restiming=%7B%22https%3A%2F%2Fwww.basicrum.com%2F%22%3A%7B%22publications%2F%22%3A%226%2C88%2C88%2C54%2C54%2C3e%2Ci%2Ci%2Ch*12h5%2Ckb%2C5b8%22%2C%22assets%2Fjs%2F%22%3A%7B%22just-the-docs.js%22%3A%223am%2Ce%2Ce*12pc%2C_%2C8oj*20%22%2C%22boomerang-1.737.60.cutting-edge.min.js%22%3A%222au%2Cb%2Ca*1pu3%2C_%2C1m19*21*42%22%2C%22vendor%2Flunr.min.js%22%3A%223am%2Cd%2C8*16t2%2C_%2Cfym*20%22%7D%7D%7D&u=https%3A%2F%2Fwww.basicrum.com%2Fpublications%2F&r=https%3A%2F%2Fwww.basicrum.com%2Fcost-analyses%2F&v=1.737.60&sv=14&sm=p&rt.si=dd0c542f-7adf-4310-830a-6c0a3d157c90-s8cjr1&rt.ss=1707075325294&rt.sl=4&vis.st=visible&ua.plt=Linux%20x86_64&ua.vnd=&pid=8fftz949&n=1&c.t.fps=07*4*65*j*61&c.t.busy=2*4*0034&c.tti.vr=408&c.tti=408&c.b=2&c.f=60&c.f.d=2511&c.f.m=1&c.f.s=ls7xfl1h&dom.res=5&dom.doms=1&mem.lsln=0&mem.ssln=0&mem.lssz=2&mem.sssz=2&scr.xy=1920x1200&scr.bpp=24%2F24&scr.orn=0%2Flandscape-primary&cpu.cnc=16&dom.ln=114&dom.sz=10438&dom.ck=157&dom.img=0&dom.script=6&dom.script.ext=3&dom.iframe=0&dom.link=4&dom.link.css=1&sb=1" > $FORM_DATA +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FORM_DATA', Form) FORMAT Vertical" + +rm $FORM_DATA \ No newline at end of file diff --git a/tests/queries/0_stateless/02992_analyzer_group_by_const.reference b/tests/queries/0_stateless/02992_analyzer_group_by_const.reference index ff61ab0a515..ea9492581c9 100644 --- a/tests/queries/0_stateless/02992_analyzer_group_by_const.reference +++ b/tests/queries/0_stateless/02992_analyzer_group_by_const.reference @@ -4,3 +4,5 @@ a|x String, Const(size = 1, String(size = 1)) String, Const(size = 1, String(size = 1)) 5128475243952187658 +0 0 +0 0 diff --git a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql index f30a49887c7..ede6e0deed9 100644 --- a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql +++ b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql @@ -10,3 +10,23 @@ select dumpColumnStructure('x') GROUP BY 'x'; select dumpColumnStructure('x'); -- from https://github.com/ClickHouse/ClickHouse/pull/60046 SELECT cityHash64('limit', _CAST(materialize('World'), 'LowCardinality(String)')) FROM system.one GROUP BY GROUPING SETS ('limit'); + +WITH ( + SELECT dummy AS x + FROM system.one + ) AS y +SELECT + y, + min(dummy) +FROM remote('127.0.0.{1,2}', system.one) +GROUP BY y; + +WITH ( + SELECT dummy AS x + FROM system.one + ) AS y +SELECT + y, + min(dummy) +FROM remote('127.0.0.{2,3}', system.one) +GROUP BY y; diff --git a/tests/queries/0_stateless/02995_forget_partition.sh b/tests/queries/0_stateless/02995_forget_partition.sh index 8ece8d3ddb3..c22f5829130 100755 --- a/tests/queries/0_stateless/02995_forget_partition.sh +++ b/tests/queries/0_stateless/02995_forget_partition.sh @@ -17,7 +17,10 @@ create table forget_partition ) engine = ReplicatedMergeTree('/test/02995/{database}/rmt', '1') order by (k, d) -partition by toYYYYMMDD(d); +partition by toYYYYMMDD(d) +-- Reduce max_merge_selecting_sleep_ms and max_cleanup_delay_period to speed up the part being dropped from memory (RMT) +-- Same with old_parts_lifetime for SMT +SETTINGS old_parts_lifetime=5, merge_selecting_sleep_ms=1000, max_merge_selecting_sleep_ms=5000, cleanup_delay_period=3, max_cleanup_delay_period=5; insert into forget_partition select number, '2024-01-01' + interval number day, randomString(20) from system.numbers limit 10; @@ -26,7 +29,7 @@ alter table forget_partition drop partition '20240102'; """ # DROP PARTITION do not wait for a part to be removed from memory due to possible concurrent SELECTs, so we have to do wait manually here -while [[ $(${CLICKHOUSE_CLIENT} -q "select count() from system.parts where database=currentDatabase() and table='forget_partition' and partition='20240101'") != 0 ]]; do sleep 0.1; done +while [[ $(${CLICKHOUSE_CLIENT} -q "select count() from system.parts where database=currentDatabase() and table='forget_partition' and partition IN ('20240101', '20240102')") != 0 ]]; do sleep 1; done ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ set allow_unrestricted_reads_from_keeper=1; diff --git a/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql index 2dfc8094115..f902f191cb7 100644 --- a/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql +++ b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql @@ -11,6 +11,6 @@ SET max_block_size = 1, min_insert_block_size_rows = 0, min_insert_block_size_by INSERT INTO too_many_parts SELECT * FROM numbers_mt(100); SELECT count() FROM too_many_parts; -INSERT INTO too_many_parts SELECT * FROM numbers_mt(10); -- { serverError 252 } +INSERT INTO too_many_parts SELECT * FROM numbers_mt(10); -- { serverError TOO_MANY_PARTS } DROP TABLE too_many_parts; diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.reference b/tests/queries/0_stateless/02998_native_parquet_reader.reference new file mode 100644 index 00000000000..38dd9f02b8b --- /dev/null +++ b/tests/queries/0_stateless/02998_native_parquet_reader.reference @@ -0,0 +1,2000 @@ +103002316 1646595280 hsn dxj wrm 1987-05-04 19:24:06.618814000 1938-12-29 14:03:44.995783000 -10371879330867684414581450918534810.916 +1548158706 -1216519640 rdx xsn sey 1942-04-24 07:22:01.629877000 1963-05-31 09:22:00.597388000 9453586064049253908450649688266944.965 +-1409329494 215463808 ytf idx grm 1964-04-02 15:32:10.118860000 1987-11-29 12:21:52.464881000 20816451926961221076776482452655249.999 +-1328718984 215070586 toj ykf sny 1962-12-25 19:25:23.099217000 1987-09-25 08:35:27.030011000 3827321890837346905750385640207228.295 +116634012 1646857428 eyt toa fau 1991-01-19 01:53:43.664105000 1939-01-24 15:34:19.169731000 -18887700638539487859059986313378243.468 +1454047574 -1216257492 ugb eyk sni 1976-06-16 10:05:27.546241000 1963-07-22 12:23:08.945284000 13732428483989527846770435107306259.709 +1534658084 -1216650714 pkv lgb oau 1938-08-21 01:37:41.671559000 1963-05-18 08:36:43.510414000 18010946380424307642205251259343751.221 +-1315087288 215332734 aug pbv bvh 1966-09-11 01:55:00.144508000 1987-10-21 10:06:01.203959000 16579148515724197427732763209849308.959 +22522880 214677364 hcw alg oju 1952-02-07 10:06:44.026644000 1939-03-17 18:35:27.517627000 -14608858218599213920740200894338928.724 +103133390 1646726354 lwr hcn bvq 1987-05-17 20:09:23.705788000 1939-01-11 14:49:02.082757000 -10330340322164434125305384742301437.212 +1548289780 -1216388566 vhc cwr gbv 1942-05-07 08:07:18.716851000 1963-06-13 10:07:17.684362000 9495125072752504197726715864500318.669 +-1409198420 -1217043936 dxj mhc kvq \N 1987-12-12 13:07:09.551855000 20857990935664471366052548628888623.703 +-1328587910 215201660 xsn doj wrd 1964-04-15 16:17:27.205834000 1987-10-08 09:20:44.116985000 3868860899540597195026451816440601.999 +116765086 1646988502 idx xse cwi 1963-01-07 20:10:40.186191000 1939-02-06 16:19:36.256705000 -18846161629836237569783920137144869.764 +1454178648 1646333132 ykf ido wrm 1991-02-01 02:39:00.751079000 1938-12-29 14:03:44.995783000 13773967492692778136046501283539633.413 +1534789158 -1216519640 toa pkf sey 1976-06-29 10:50:44.633215000 1963-05-31 09:22:00.597388000 18052485389127557931481317435577124.925 +-1314956214 215463808 eyk tfa xje 1938-09-03 02:22:58.758533000 1987-11-03 10:51:18.290933000 16620687524427447717008829386082682.663 +22653954 214808438 lgb epk sny 1966-09-24 02:40:17.231482000 1987-09-25 08:35:27.030011000 -14567319209895963631464134718105555.02 +103264464 1646857428 pbv lgr fau 1952-02-20 10:52:01.113618000 1939-01-24 15:34:19.169731000 -10288801313461183836029318566068063.508 +1548420854 -1216257492 alg gbv kfa 1987-05-30 20:54:40.792762000 1963-06-26 10:52:34.771336000 9536664081455754487002782040733692.373 +-1409067346 -1216912862 hcn qlg oau 1942-05-20 08:52:35.803825000 1963-05-18 08:36:43.510414000 20899529944367721655328614805121997.407 +-1328456836 215332734 cwr hsn bvh 1964-04-28 17:02:44.292808000 1987-10-21 10:06:01.203959000 3910399908243847484302517992673975.703 +116896160 214677364 mhc cwi gbm 1963-01-20 20:55:57.273165000 1939-02-19 17:04:53.343679000 -18804622621132987280507853960911496.06 +1454309722 1646464206 doj mhs bvq 1991-02-14 03:24:17.838053000 1939-01-11 14:49:02.082757000 13815506501396028425322567459773007.117 +1534920232 -1216388566 xse toj gbv 1976-07-12 11:36:01.720189000 1963-06-13 10:07:17.684362000 18094024397830808220757383611810498.629 +-1314825140 -1217043936 ido xje cni 1938-09-16 03:08:15.845507000 1987-11-16 11:36:35.377907000 16662226533130698006284895562316056.367 +22785028 214939512 pkf ito wrd 1966-10-07 03:25:34.318456000 1987-10-08 09:20:44.116985000 -14525780201192713342188068541872181.316 +103395538 1646988502 tfa pkv cwi 1952-03-04 11:37:18.200592000 1939-02-06 16:19:36.256705000 -10247262304757933546753252389834689.804 +1548551928 1646333132 epk kfa oje 1987-06-12 21:39:57.879736000 1963-07-09 11:37:51.858310000 9578203090159004776278848216967066.077 +-1408936272 -1216781788 lgr upk sey 1942-06-02 09:37:52.890799000 1963-05-31 09:22:00.597388000 20941068953070971944604680981355371.111 +-1328325762 215463808 gbv lwr xje 1964-05-11 17:48:01.379782000 1987-11-03 10:51:18.290933000 3951938916947097773578584168907349.407 +117027234 214808438 qlg gbm kfq 1963-02-02 21:41:14.360139000 1939-03-04 17:50:10.430653000 -18763083612429736991231787784678122.356 +1454440796 1646595280 hsn qlw fau 1991-02-27 04:09:34.925027000 1939-01-24 15:34:19.169731000 13857045510099278714598633636006380.821 +1535051306 -1216257492 cwi xsn kfa 1976-07-25 12:21:18.807163000 1963-06-26 10:52:34.771336000 18135563406534058510033449788043872.333 +-1314694066 -1216912862 mhs cni grm 1938-09-29 03:53:32.932481000 1987-11-29 12:21:52.464881000 16703765541833948295560961738549430.071 +22916102 215070586 toj mxs bvh 1966-10-20 04:10:51.405430000 1987-10-21 10:06:01.203959000 -14484241192489463052912002365638807.612 +103526612 214677364 xje toa gbm 1952-03-17 12:22:35.287566000 1939-02-19 17:04:53.343679000 -10205723296054683257477186213601316.1 +1548683002 1646464206 ito oje sni 1987-06-25 22:25:14.966710000 1963-07-22 12:23:08.945284000 9619742098862255065554914393200439.781 +-1408805198 -1216650714 pkv yto gbv 1942-06-15 10:23:09.977773000 1963-06-13 10:07:17.684362000 20982607961774222233880747157588744.815 +-1328194688 -1217043936 kfa pbv cni 1964-05-24 18:33:18.466755000 1987-11-16 11:36:35.377907000 3993477925650348062854650345140723.111 +117158308 214939512 upk kfq oju 1963-02-15 22:26:31.447112000 1939-03-17 18:35:27.517627000 -18721544603726486701955721608444748.652 +1454571870 1646726354 lwr upb cwi 1991-03-12 04:54:52.012001000 1939-02-06 16:19:36.256705000 13898584518802529003874699812239754.525 +1535182380 1646333132 gbm cwr oje 1976-08-07 13:06:35.894137000 1963-07-09 11:37:51.858310000 18177102415237308799309515964277246.037 +-1314562992 -1216781788 qlw grm kvq 1938-10-12 04:38:50.019455000 1987-12-12 13:07:09.551855000 16745304550537198584837027914782803.775 +23047176 215201660 xsn qcw xje 1966-11-02 04:56:08.492404000 1987-11-03 10:51:18.290933000 -14442702183786212763635936189405433.908 +103657686 214808438 cni xse kfq 1952-03-30 13:07:52.374540000 1939-03-04 17:50:10.430653000 -10164184287351432968201120037367942.396 +1548814076 1646595280 mxs sni wrm 1987-07-08 23:10:32.053684000 1938-12-29 14:03:44.995783000 9661281107565505354830980569433813.485 +-1408674124 -1216519640 toa dxs kfa 1942-06-28 11:08:27.064746000 1963-06-26 10:52:34.771336000 21024146970477472523156813333822118.519 +-1328063614 -1216912862 oje tfa grm 1964-06-06 19:18:35.553729000 1987-11-29 12:21:52.464881000 4035016934353598352130716521374096.815 +117289382 215070586 yto oju sny 1963-02-28 23:11:48.534086000 1987-09-25 08:35:27.030011000 -18680005595023236412679655432211374.948 +1454702944 1646857428 pbv ytf gbm 1991-03-25 05:40:09.098975000 1939-02-19 17:04:53.343679000 13940123527505779293150765988473128.229 +1535313454 1646464206 kfq gbv sni 1976-08-20 13:51:52.981111000 1963-07-22 12:23:08.945284000 18218641423940559088585582140510619.741 +-1314431918 -1216650714 upb \N oau 1938-10-25 05:24:07.106429000 1963-05-18 08:36:43.510414000 16786843559240448874113094091016177.479 +23178250 215332734 cwr kvq cni 1966-11-15 05:41:25.579378000 1987-11-16 11:36:35.377907000 -14401163175082962474359870013172060.204 +103788760 214939512 grm ugb oju 1952-04-12 13:53:09.461514000 1939-03-17 18:35:27.517627000 -10122645278648182678925053861134568.692 +1548945150 1646726354 qcw cwi bvq 1987-07-21 23:55:49.140658000 1939-01-11 14:49:02.082757000 9702820116268755644107046745667187.189 +-1408543050 -1216388566 xse wrm oje 1942-07-11 11:53:44.151720000 1963-07-09 11:37:51.858310000 21065685979180722812432879510055492.223 +-1327932540 -1216781788 sni hcw kvq 1964-06-19 20:03:52.640703000 1987-12-12 13:07:09.551855000 4076555943056848641406782697607470.519 +117420456 215201660 dxs xje wrd 1963-03-13 23:57:05.621060000 1987-10-08 09:20:44.116985000 -18638466586319986123403589255978001.244 +1454834018 1646988502 tfa sny kfq 1991-04-07 06:25:26.185949000 1939-03-04 17:50:10.430653000 13981662536209029582426832164706501.933 +1535444528 1646595280 oju dxj wrm 1976-09-02 14:37:10.068085000 1938-12-29 14:03:44.995783000 18260180432643809377861648316743993.445 +-1314300844 -1216519640 ytf kfa sey 1938-11-07 06:09:24.193403000 1963-05-31 09:22:00.597388000 16828382567943699163389160267249551.183 +23309324 215463808 gbv oau grm 1966-11-28 06:26:42.666352000 1987-11-29 12:21:52.464881000 -14359624166379712185083803836938686.5 +103919834 215070586 kvq ykf sny 1952-04-25 14:38:26.548488000 1987-09-25 08:35:27.030011000 -10081106269944932389648987684901194.988 +1549076224 1646857428 ugb gbm fau 1987-08-04 00:41:06.227632000 1939-01-24 15:34:19.169731000 9744359124972005933383112921900560.893 +-1408411976 -1216257492 cwi bvq sni 1942-07-24 12:39:01.238694000 1963-07-22 12:23:08.945284000 21107224987883973101708945686288865.927 +-1327801466 -1216650714 wrm lgb oau 1964-07-02 20:49:09.727677000 1963-05-18 08:36:43.510414000 4118094951760098930682848873840844.223 +117551530 215332734 hcw cni bvh 1963-03-27 00:42:22.708034000 1987-10-21 10:06:01.203959000 -18596927577616735834127523079744627.54 +1454965092 214677364 xje wrd oju 1991-04-20 07:10:43.272923000 1939-03-17 18:35:27.517627000 14023201544912279871702898340939875.637 +1535575602 1646726354 sny hcn bvq 1976-09-15 15:22:27.155059000 1939-01-11 14:49:02.082757000 18301719441347059667137714492977367.149 +-1314169770 -1216388566 dxj oje gbv 1938-11-20 06:54:41.280377000 1963-06-13 10:07:17.684362000 16869921576646949452665226443482924.887 +23440398 -1217043936 kfa sey kvq 1966-12-11 07:11:59.753326000 1987-12-12 13:07:09.551855000 -14318085157676461895807737660705312.796 +104050908 215201660 oau doj wrd 1952-05-08 15:23:43.635462000 1987-10-08 09:20:44.116985000 -10039567261241682100372921508667821.284 +1549207298 1646988502 ykf kfq cwi 1987-08-17 01:26:23.314606000 1939-02-06 16:19:36.256705000 9785898133675256222659179098133934.597 +-1408280902 1646333132 gbm fau wrm 1942-08-06 13:24:18.325668000 1938-12-29 14:03:44.995783000 21148763996587223390985011862522239.631 +-1327670392 -1216519640 bvq pkf sey 1964-07-15 21:34:26.814651000 1963-05-31 09:22:00.597388000 4159633960463349219958915050074217.927 +117682604 215463808 lgb grm xje 1963-04-09 01:27:39.795008000 1987-11-03 10:51:18.290933000 -18555388568913485544851456903511253.836 +1455096166 214808438 cni bvh sny 1991-05-03 07:56:00.359897000 1987-09-25 08:35:27.030011000 14064740553615530160978964517173249.341 +1535706676 1646857428 wrd lgr fau 1976-09-28 16:07:44.242033000 1939-01-24 15:34:19.169731000 18343258450050309956413780669210740.853 +-1314038696 -1216257492 hcn sni kfa 1938-12-03 07:39:58.367351000 1963-06-26 10:52:34.771336000 16911460585350199741941292619716298.591 +23571472 -1216912862 oje wid oau 1966-12-24 07:57:16.840300000 1963-05-18 08:36:43.510414000 -14276546148973211606531671484471939.092 +104181982 215332734 sey hsn bvh 1952-05-21 16:09:00.722436000 1987-10-21 10:06:01.203959000 -9998028252538431811096855332434447.58 +1549338372 214677364 doj oju gbm 1987-08-30 02:11:40.401579000 1939-02-19 17:04:53.343679000 9827437142378506511935245274367308.301 +-1408149828 1646464206 kfq jey bvq 1942-08-19 14:09:35.412642000 1939-01-11 14:49:02.082757000 21190303005290473680261078038755613.335 +-1327539318 -1216388566 fau toj gbv 1964-07-28 22:19:43.901625000 1963-06-13 10:07:17.684362000 4201172969166599509234981226307591.631 +117813678 -1217043936 pkf kvq cni 1963-04-22 02:12:56.881982000 1987-11-16 11:36:35.377907000 -18513849560210235255575390727277880.132 +1455227240 214939512 grm fal wrd 1991-05-16 08:41:17.446871000 1987-10-08 09:20:44.116985000 14106279562318780450255030693406623.045 +1535837750 1646988502 bvh pkv cwi 1976-10-11 16:53:01.329007000 1939-02-06 16:19:36.256705000 18384797458753560245689846845444114.557 +-1313907622 1646333132 lgr wrm oje 1938-12-16 08:25:15.454325000 1963-07-09 11:37:51.858310000 16952999594053450031217358795949672.295 +23702546 -1216781788 sni \N sey 1967-01-06 08:42:33.927274000 1963-05-31 09:22:00.597388000 -14235007140269961317255605308238565.388 +104313056 215463808 wid bmh xje 1952-06-03 16:54:17.809410000 1987-11-03 10:51:18.290933000 -9956489243835181521820789156201073.876 +1549469446 214808438 hsn lwr kfq 1987-09-12 02:56:57.488553000 1939-03-04 17:50:10.430653000 9868976151081756801211311450600682.005 +-1408018754 1646595280 oju sny fau 1942-09-01 14:54:52.499616000 1939-01-24 15:34:19.169731000 21231842013993723969537144214988987.039 +-1327408244 -1216257492 jey nid kfa 1964-08-10 23:05:00.988599000 1963-06-26 10:52:34.771336000 4242711977869849798511047402540965.335 +117944752 -1216912862 toj xsn grm 1963-05-05 02:58:13.968956000 1987-11-29 12:21:52.464881000 -18472310551506984966299324551044506.428 +1455358314 215070586 kvq oau bvh 1991-05-29 09:26:34.533845000 1987-10-21 10:06:01.203959000 14147818571022030739531096869639996.749 +1535968824 214677364 fal jep gbm 1976-10-24 17:38:18.415981000 1939-02-19 17:04:53.343679000 18426336467456810534965913021677488.261 +-1313776548 1646464206 pkv toa sni 1938-12-29 09:10:32.541299000 1963-07-22 12:23:08.945284000 16994538602756700320493424972183045.999 +23833620 -1216650714 wrm bvq gbv 1967-01-19 09:27:51.014248000 1963-06-13 10:07:17.684362000 -14193468131566711027979539132005191.684 +104444130 -1217043936 bmh fql cni 1952-06-16 17:39:34.896384000 1987-11-16 11:36:35.377907000 -9914950235131931232544722979967700.172 +1549600520 214939512 lwr pbv oju 1987-09-25 03:42:14.575527000 1939-03-17 18:35:27.517627000 \N +-1407887680 1646726354 sny wrd cwi 1942-09-14 15:40:09.586590000 1939-02-06 16:19:36.256705000 9910515159785007090487377626834055.709 +-1327277170 1646333132 nid rmh oje 1964-08-23 23:50:18.075573000 1963-07-09 11:37:51.858310000 5733090138320292352297426736847.527 +118075826 -1216781788 xsn cwr kvq 1963-05-18 03:43:31.055930000 1987-12-12 13:07:09.551855000 4284250986573100087787113578774339.039 +1455489388 215201660 oau sey xje 1991-06-11 10:11:51.620819000 1987-11-03 10:51:18.290933000 -18430771542803734677023258374811132.724 +1536099898 214808438 jep nit kfq 1976-11-06 18:23:35.502955000 1939-03-04 17:50:10.430653000 14189357579725281028807163045873370.453 +-1313645474 1646595280 toa xse wrm 1939-01-11 09:55:49.628273000 1938-12-29 14:03:44.995783000 18467875476160060824241979197910861.965 +23964694 -1216519640 bvq fau kfa 1967-02-01 10:13:08.101222000 1963-06-26 10:52:34.771336000 17036077611459950609769491148416419.703 +104575204 -1216912862 fql jup grm 1952-06-29 18:24:51.983358000 1987-11-29 12:21:52.464881000 -14151929122863460738703472955771817.98 +1549731594 215070586 pbv tfa sny 1987-10-08 04:27:31.662501000 1987-09-25 08:35:27.030011000 -9873411226428680943268656803734326.468 +-1407756606 1646857428 wrd bvh gbm 1942-09-27 16:25:26.673564000 1939-02-19 17:04:53.343679000 9952054168488257379763443803067429.413 +-1327146096 1646464206 rmh vql sni 1964-09-06 00:35:35.162547000 1963-07-22 12:23:08.945284000 47272098841570581628363602970221.231 +118206900 -1216650714 cwr gbv oau \N 1963-05-18 08:36:43.510414000 4325789995276350377063179755007712.743 +1455620462 215332734 sey wid cni 1963-05-31 04:28:48.142904000 1987-11-16 11:36:35.377907000 -18389232534100484387747192198577759.02 +1536230972 214939512 nit rmx oju 1991-06-24 10:57:08.707793000 1939-03-17 18:35:27.517627000 14230896588428531318083229222106744.157 +-1313514400 1646726354 xse cwi bvq 1976-11-19 19:08:52.589929000 1939-01-11 14:49:02.082757000 18509414484863311113518045374144235.669 +24095768 -1216388566 fau jey oje 1939-01-24 10:41:06.715247000 1963-07-09 11:37:51.858310000 17077616620163200899045557324649793.407 +104706278 -1216781788 jup nyt kvq 1967-02-14 10:58:25.188196000 1987-12-12 13:07:09.551855000 -14110390114160210449427406779538444.276 +1549862668 215201660 tfa xje wrd 1952-07-12 19:10:09.070331000 1987-10-08 09:20:44.116985000 -9831872217725430653992590627500952.764 +-1407625532 1646988502 bvh fal kfq 1987-10-21 05:12:48.749475000 1939-03-04 17:50:10.430653000 9993593177191507669039509979300803.117 +-1327015022 1646595280 vql aup wrm 1942-10-10 17:10:43.760538000 1938-12-29 14:03:44.995783000 88811107544820870904429779203594.935 +118337974 -1216519640 gbv kfa sey 1964-09-19 01:20:52.249521000 1963-05-31 09:22:00.597388000 4367329003979600666339245931241086.447 +1455751536 215463808 wid bmh grm 1963-06-13 05:14:05.229878000 1987-11-29 12:21:52.464881000 -18347693525397234098471126022344385.316 +1536362046 215070586 rmx vqc sny 1991-07-07 11:42:25.794767000 1987-09-25 08:35:27.030011000 14272435597131781607359295398340117.861 +-1313383326 1646857428 cwi gbm fau 1976-12-02 19:54:09.676903000 1939-01-24 15:34:19.169731000 18550953493566561402794111550377609.373 +24226842 -1216257492 jey nid sni 1939-02-06 11:26:23.802221000 1963-07-22 12:23:08.945284000 17119155628866451188321623500883167.111 +104837352 -1216650714 nyt rdx oau 1967-02-27 11:43:42.275169000 1963-05-18 08:36:43.510414000 -14068851105456960160151340603305070.572 +1549993742 215332734 xje cni bvh 1952-07-25 19:55:26.157305000 1987-10-21 10:06:01.203959000 -9790333209022180364716524451267579.06 +-1407494458 214677364 fal jep oju 1987-11-03 05:58:05.836449000 1939-03-17 18:35:27.517627000 10035132185894757958315576155534176.821 +-1326883948 1646726354 aup eyt bvq 1942-10-23 17:56:00.847512000 1939-01-11 14:49:02.082757000 130350116248071160180495955436968.639 +118469048 -1216388566 kfa oje gbv 1964-10-02 02:06:09.336495000 1963-06-13 10:07:17.684362000 4408868012682850955615312107474460.151 +1455882610 -1217043936 bmh fql kvq 1963-06-26 05:59:22.316852000 1987-12-12 13:07:09.551855000 -18306154516693983809195059846111011.612 +1536493120 215201660 vqc aug wrd 1991-07-20 12:27:42.881741000 1987-10-08 09:20:44.116985000 14313974605835031896635361574573491.565 +-1313252252 1646988502 gbm kfq cwi 1976-12-15 20:39:26.763877000 1939-02-06 16:19:36.256705000 18592492502269811692070177726610983.077 +24357916 1646333132 nid rmh wrm 1939-02-19 12:11:40.889195000 1938-12-29 14:03:44.995783000 17160694637569701477597689677116540.815 +104968426 -1216519640 rdx vhc sey 1967-03-12 12:28:59.362143000 1963-05-31 09:22:00.597388000 -14027312096753709870875274427071696.868 +1550124816 215463808 cni grm xje 1952-08-07 20:40:43.244279000 1987-11-03 10:51:18.290933000 -9748794200318930075440458275034205.356 +-1407363384 214808438 jep nit sny 1987-11-16 06:43:22.923423000 1987-09-25 08:35:27.030011000 10076671194598008247591642331767550.525 +-1326752874 1646857428 eyt idx fau 1942-11-05 18:41:17.934486000 1939-01-24 15:34:19.169731000 171889124951321449456562131670342.343 +118600122 -1216257492 oje sni kfa 1964-10-15 02:51:26.423469000 1963-06-26 10:52:34.771336000 4450407021386101244891378283707833.855 +1456013684 -1216912862 fql jup oau 1963-07-09 06:44:39.403826000 1963-05-18 08:36:43.510414000 -18264615507990733519918993669877637.908 +1536624194 215332734 aug eyk bvh 1991-08-02 13:12:59.968715000 1987-10-21 10:06:01.203959000 14355513614538282185911427750806865.269 +-1313121178 214677364 kfq oju gbm 1976-12-28 21:24:43.850851000 1939-02-19 17:04:53.343679000 18634031510973061981346243902844356.781 +24488990 1646464206 rmh vql bvq 1939-03-04 12:56:57.976169000 1939-01-11 14:49:02.082757000 17202233646272951766873755853349914.519 +105099500 -1216388566 vhc alg gbv 1967-03-25 13:14:16.449117000 1963-06-13 10:07:17.684362000 -13985773088050459581599208250838323.164 +1550255890 -1217043936 grm kvq cni 1952-08-20 21:26:00.331253000 1987-11-16 11:36:35.377907000 -9707255191615679786164392098800831.652 +-1407232310 214939512 nit rmx wrd 1987-11-29 07:28:40.010397000 1987-10-08 09:20:44.116985000 10118210203301258536867708508000924.229 +-1326621800 1646988502 idx mhc cwi 1942-11-18 19:26:35.021460000 1939-02-06 16:19:36.256705000 213428133654571738732628307903716.047 +118731196 1646333132 sni wrm oje 1964-10-28 03:36:43.510443000 1963-07-09 11:37:51.858310000 4491946030089351534167444459941207.559 +1456144758 -1216781788 jup nyt sey 1963-07-22 07:29:56.490800000 1963-05-31 09:22:00.597388000 -18223076499287483230642927493644264.204 +1536755268 215463808 eyk ido xje 1991-08-15 13:58:17.055689000 1987-11-03 10:51:18.290933000 14397052623241532475187493927040238.973 +-1312990104 214808438 oju sny kfq 1977-01-10 22:10:00.937825000 1939-03-04 17:50:10.430653000 18675570519676312270622310079077730.485 +24620064 1646595280 vql aup fau 1939-03-17 13:42:15.063143000 1939-01-24 15:34:19.169731000 17243772654976202056149822029583288.223 +105230574 -1216257492 alg epk kfa 1967-04-07 13:59:33.536091000 1963-06-26 10:52:34.771336000 -13944234079347209292323142074604949.46 +1550386964 -1216912862 kvq oau grm 1952-09-02 22:11:17.418227000 1987-11-29 12:21:52.464881000 -9665716182912429496888325922567457.948 +-1407101236 215070586 rmx vqc bvh 1987-12-12 08:13:57.097371000 1987-10-21 10:06:01.203959000 10159749212004508826143774684234297.933 +-1326490726 214677364 mhc qlg gbm 1942-12-01 20:11:52.108434000 1939-02-19 17:04:53.343679000 254967142357822028008694484137089.751 +118862270 1646464206 wrm bvq sni 1964-11-10 04:22:00.597417000 1963-07-22 12:23:08.945284000 4533485038792601823443510636174581.263 +1456275832 -1216650714 nyt rdx gbv 1963-08-04 08:15:13.577774000 1963-06-13 10:07:17.684362000 -18181537490584232941366861317410890.5 +1536886342 -1217043936 ido mhs cni 1991-08-28 14:43:34.142663000 1987-11-16 11:36:35.377907000 14438591631944782764463560103273612.677 +-1312859030 214939512 sny wrd oju 1977-01-23 22:55:18.024798000 1939-03-17 18:35:27.517627000 18717109528379562559898376255311104.189 +24751138 1646726354 aup eyt cwi 1939-03-30 14:27:32.150116000 1939-02-06 16:19:36.256705000 17285311663679452345425888205816661.927 +105361648 1646333132 epk ito oje 1967-04-20 14:44:50.623065000 1963-07-09 11:37:51.858310000 -13902695070643959003047075898371575.756 +1550518038 -1216781788 oau sey kvq 1952-09-15 22:56:34.505201000 1987-12-12 13:07:09.551855000 -9624177174209179207612259746334084.244 +-1406970162 215201660 vqc aug xje 1987-12-25 08:59:14.184345000 1987-11-03 10:51:18.290933000 10201288220707759115419840860467671.637 +-1326359652 214808438 qlg upk kfq 1942-12-14 20:57:09.195408000 1939-03-04 17:50:10.430653000 296506151061072317284760660370463.455 +118993344 1646595280 bvq fau wrm 1964-11-23 05:07:17.684391000 1938-12-29 14:03:44.995783000 4575024047495852112719576812407954.967 +1456406906 -1216519640 rdx vhc kfa 1963-08-17 09:00:30.664748000 1963-06-26 10:52:34.771336000 -18139998481880982652090795141177516.796 +1537017416 -1216912862 mhs qlw grm 1991-09-10 15:28:51.229636000 1987-11-29 12:21:52.464881000 14480130640648033053739626279506986.381 +-1312727956 215070586 wrd bvh sny 1977-02-05 23:40:35.111772000 1987-09-25 08:35:27.030011000 18758648537082812849174442431544477.893 +24882212 1646857428 eyt idx gbm 1939-04-12 15:12:49.237090000 1939-02-19 17:04:53.343679000 17326850672382702634701954382050035.631 +105492722 1646464206 ito mxs sni 1967-05-03 15:30:07.710039000 1963-07-22 12:23:08.945284000 -13861156061940708713771009722138202.052 +1550649112 -1216650714 sey wid oau 1952-09-28 23:41:51.592175000 1963-05-18 08:36:43.510414000 -9582638165505928918336193570100710.54 +-1406839088 215332734 aug eyk cni 1988-01-07 09:44:31.271319000 1987-11-16 11:36:35.377907000 10242827229411009404695907036701045.341 +-1326228578 214939512 upk yto oju 1942-12-27 21:42:26.282382000 1939-03-17 18:35:27.517627000 338045159764322606560826836603837.159 +119124418 1646726354 fau jey bvq 1964-12-06 05:52:34.771365000 1939-01-11 14:49:02.082757000 4616563056199102401995642988641328.671 +1456537980 -1216388566 vhc alg oje 1963-08-30 09:45:47.751722000 1963-07-09 11:37:51.858310000 -18098459473177732362814728964944143.092 +1537148490 -1216781788 qlw upb kvq 1991-09-23 16:14:08.316610000 1987-12-12 13:07:09.551855000 14521669649351283343015692455740360.085 +-1312596882 215201660 bvh fal wrd 1977-02-19 00:25:52.198746000 1987-10-08 09:20:44.116985000 18800187545786063138450508607777851.597 +25013286 1646988502 idx mhc kfq 1939-04-25 15:58:06.324064000 1939-03-04 17:50:10.430653000 17368389681085952923978020558283409.335 +105623796 1646595280 mxs qcw wrm 1967-05-16 16:15:24.797013000 1938-12-29 14:03:44.995783000 -13819617053237458424494943545904828.348 +1550780186 -1216519640 wid bmh sey 1952-10-12 00:27:08.679149000 1963-05-31 09:22:00.597388000 -9541099156802678629060127393867336.836 +-1406708014 215463808 eyk ido grm 1988-01-20 10:29:48.358293000 1987-11-29 12:21:52.464881000 10284366238114259693971973212934419.045 +-1326097504 215070586 yto dxs sny 1943-01-09 22:27:43.369356000 1987-09-25 08:35:27.030011000 379584168467572895836893012837210.863 +119255492 1646857428 jey nid fau 1964-12-19 06:37:51.858339000 1939-01-24 15:34:19.169731000 4658102064902352691271709164874702.375 +1456669054 -1216257492 alg epk sni 1963-09-12 10:31:04.838696000 1963-07-22 12:23:08.945284000 -18056920464474482073538662788710769.388 +1537279564 -1216650714 upb ytf oau 1991-10-06 16:59:25.403584000 1963-05-18 08:36:43.510414000 14563208658054533632291758631973733.789 +-1312465808 215332734 fal jep bvh 1977-03-04 01:11:09.285720000 1987-10-21 10:06:01.203959000 18841726554489313427726574784011225.301 +25144360 214677364 mhc qlg oju 1939-05-08 16:43:23.411038000 1939-03-17 18:35:27.517627000 17409928689789203213254086734516783.039 +105754870 1646726354 qcw ugb bvq 1967-05-29 17:00:41.883987000 1939-01-11 14:49:02.082757000 -13778078044534208135218877369671454.644 +1550911260 -1216388566 bmh fql gbv 1952-10-25 01:12:25.766123000 1963-06-13 10:07:17.684362000 -9499560148099428339784061217633963.132 +-1406576940 -1217043936 ido mhs kvq 1988-02-02 11:15:05.445267000 1987-12-12 13:07:09.551855000 10325905246817509983248039389167792.749 +-1325966430 215201660 dxs hcw wrd 1943-01-22 23:13:00.456330000 1987-10-08 09:20:44.116985000 421123177170823185112959189070584.567 +119386566 1646988502 nid rmh cwi 1965-01-01 07:23:08.945313000 1939-02-06 16:19:36.256705000 4699641073605602980547775341108076.079 +1456800128 1646333132 epk ito wrm 1963-09-25 11:16:21.925670000 1938-12-29 14:03:44.995783000 -18015381455771231784262596612477395.684 +1537410638 -1216519640 ytf dxj sey 1991-10-19 17:44:42.490558000 1963-05-31 09:22:00.597388000 14604747666757783921567824808207107.493 +-1312334734 215463808 jep nit xje 1977-03-17 01:56:26.372694000 1987-11-03 10:51:18.290933000 18883265563192563717002640960244599.005 +25275434 214808438 qlg upk sny 1939-05-21 17:28:40.498012000 1987-09-25 08:35:27.030011000 17451467698492453502530152910750156.743 +105885944 1646857428 ugb ykf fau 1967-06-11 17:45:58.970961000 1939-01-24 15:34:19.169731000 -13736539035830957845942811193438080.94 +1551042334 -1216257492 fql jup kfa 1952-11-07 01:57:42.853097000 1963-06-26 10:52:34.771336000 -9458021139396178050507995041400589.428 +-1406445866 -1216912862 mhs qlw oau 1988-02-15 12:00:22.532241000 1963-05-18 08:36:43.510414000 10367444255520760272524105565401166.453 +-1325835356 215332734 hcw lgb bvh 1943-02-04 23:58:17.543304000 1987-10-21 10:06:01.203959000 462662185874073474389025365303958.271 +119517640 214677364 rmh vql gbm 1965-01-14 08:08:26.032287000 1939-02-19 17:04:53.343679000 4741180082308853269823841517341449.783 +1456931202 1646464206 ito mxs bvq 1963-10-08 12:01:39.012644000 1939-01-11 14:49:02.082757000 -17973842447067981494986530436244021.98 +1537541712 -1216388566 dxj hcn gbv 1991-11-01 18:29:59.577532000 1963-06-13 10:07:17.684362000 14646286675461034210843890984440481.197 +-1312203660 -1217043936 nit rmx cni 1977-03-30 02:41:43.459668000 1987-11-16 11:36:35.377907000 18924804571895814006278707136477972.709 +25406508 214939512 upk yto wrd 1939-06-03 18:13:57.584986000 1987-10-08 09:20:44.116985000 17493006707195703791806219086983530.447 +106017018 1646988502 ykf doj cwi 1967-06-24 18:31:16.057935000 1939-02-06 16:19:36.256705000 -13695000027127707556666745017204707.236 +1551173408 1646333132 jup nyt oje 1952-11-20 02:42:59.940071000 1963-07-09 11:37:51.858310000 -9416482130692927761231928865167215.724 +-1406314792 -1216781788 qlw upb sey 1988-02-28 12:45:39.619215000 1963-05-31 09:22:00.597388000 10408983264224010561800171741634540.157 +-1325704282 215463808 lgb pkf xje 1943-02-18 00:43:34.630278000 1987-11-03 10:51:18.290933000 504201194577323763665091541537331.975 +\N 214808438 vql aup kfq 1965-01-27 08:53:43.119261000 1939-03-04 17:50:10.430653000 4782719091012103559099907693574823.487 +119648714 1646595280 mxs qcw fau 1963-10-21 12:46:56.099618000 1939-01-24 15:34:19.169731000 -17932303438364731205710464260010648.276 +1457062276 -1216257492 hcn lgr kfa 1991-11-14 19:15:16.664506000 1963-06-26 10:52:34.771336000 14687825684164284500119957160673854.901 +1537672786 -1216912862 rmx vqc grm 1977-04-12 03:27:00.546642000 1987-11-29 12:21:52.464881000 18966343580599064295554773312711346.413 +-1312072586 215070586 yto dxs bvh 1939-06-16 18:59:14.671960000 1987-10-21 10:06:01.203959000 17534545715898954081082285263216904.151 +25537582 214677364 doj \N gbm 1967-07-07 19:16:33.144909000 1939-02-19 17:04:53.343679000 -13653461018424457267390678840971333.532 +106148092 1646464206 nyt hsn sni 1952-12-03 03:28:17.027045000 1963-07-22 12:23:08.945284000 -9374943121989677471955862688933842.02 +1551304482 -1216650714 upb rdx gbv 1988-03-12 13:30:56.706189000 1963-06-13 10:07:17.684362000 10450522272927260851076237917867913.861 +-1406183718 -1217043936 pkf ytf cni 1943-03-03 01:28:51.717252000 1987-11-16 11:36:35.377907000 545740203280574052941157717770705.679 +-1325573208 214939512 aup toj oju 1965-02-09 09:39:00.206235000 1939-03-17 18:35:27.517627000 4824258099715353848375973869808197.191 +119779788 1646726354 qcw eyt cwi 1963-11-03 13:32:13.186592000 1939-02-06 16:19:36.256705000 -17890764429661480916434398083777274.572 +1457193350 1646333132 lgr ugb oje 1991-11-27 20:00:33.751480000 1963-07-09 11:37:51.858310000 14729364692867534789396023336907228.605 +1537803860 -1216781788 vqc pkv kvq 1977-04-25 04:12:17.633616000 1987-12-12 13:07:09.551855000 19007882589302314584830839488944720.117 +-1311941512 215201660 dxs aug xje 1939-06-29 19:44:31.758934000 1987-11-03 10:51:18.290933000 17576084724602204370358351439450277.855 +25668656 214808438 hsn hcw kfq 1967-07-20 20:01:50.231883000 1939-03-04 17:50:10.430653000 -13611922009721206978114612664737959.828 +106279166 1646595280 rdx lwr wrm 1952-12-16 04:13:34.114019000 1938-12-29 14:03:44.995783000 -9333404113286427182679796512700468.316 +1551435556 -1216519640 ytf vhc kfa 1988-03-25 14:16:13.793163000 1963-06-26 10:52:34.771336000 10492061281630511140352304094101287.565 +-1406052644 -1216912862 toj dxj grm 1943-03-16 02:14:08.804226000 1987-11-29 12:21:52.464881000 587279211983824342217223894004079.383 +-1325442134 215070586 eyt xsn sny 1965-02-22 10:24:17.293209000 1987-09-25 08:35:27.030011000 4865797108418604137652040046041570.895 +119910862 1646857428 ugb idx gbm 1963-11-16 14:17:30.273566000 1939-02-19 17:04:53.343679000 -17849225420958230627158331907543900.868 +1457324424 1646464206 pkv ykf sni 1991-12-10 20:45:50.838454000 1963-07-22 12:23:08.945284000 14770903701570785078672089513140602.309 +1537934934 -1216650714 aug toa oau 1977-05-08 04:57:34.720590000 1963-05-18 08:36:43.510414000 19049421598005564874106905665178093.821 +-1311810438 215332734 hcw eyk cni 1939-07-12 20:29:48.845908000 1987-11-16 11:36:35.377907000 17617623733305454659634417615683651.559 +25799730 214939512 lwr lgb oju 1967-08-02 20:47:07.318857000 1939-03-17 18:35:27.517627000 -13570383001017956688838546488504586.124 +106410240 1646726354 vhc pbv bvq 1952-12-29 04:58:51.200993000 1939-01-11 14:49:02.082757000 -9291865104583176893403730336467094.612 +1551566630 -1216388566 dxj alg oje 1988-04-07 15:01:30.880137000 1963-07-09 11:37:51.858310000 10533600290333761429628370270334661.269 +-1405921570 -1216781788 xsn hcn kvq 1943-03-29 02:59:25.891200000 1987-12-12 13:07:09.551855000 628818220687074631493290070237453.087 +-1325311060 215201660 idx cwr wrd 1965-03-07 11:09:34.380183000 1987-10-08 09:20:44.116985000 4907336117121854426928106222274944.599 +120041936 1646988502 ykf mhc kfq 1963-11-29 15:02:47.360539000 1939-03-04 17:50:10.430653000 -17807686412254980337882265731310527.164 +1457455498 1646595280 toa doj wrm 1991-12-23 21:31:07.925428000 1938-12-29 14:03:44.995783000 14812442710274035367948155689373976.013 +1538066008 -1216519640 eyk xse sey 1977-05-21 05:42:51.807564000 1963-05-31 09:22:00.597388000 19090960606708815163382971841411467.525 +-1311679364 215463808 lgb ido grm 1939-07-25 21:15:05.932882000 1987-11-29 12:21:52.464881000 17659162742008704948910483791917025.263 +25930804 215070586 pbv pkf \N 1967-08-15 21:32:24.405831000 1987-09-25 08:35:27.030011000 -13528843992314706399562480312271212.42 +106541314 1646857428 alg tfa sny 1953-01-11 05:44:08.287967000 1939-01-24 15:34:19.169731000 -9250326095879926604127664160233720.908 +1551697704 -1216257492 hcn epk fau 1988-04-20 15:46:47.967111000 1963-07-22 12:23:08.945284000 10575139299037011718904436446568034.973 +-1405790496 -1216650714 cwr lgr sni 1943-04-11 03:44:42.978174000 1963-05-18 08:36:43.510414000 670357229390324920769356246470826.791 +-1325179986 215332734 mhc gbv oau 1965-03-20 11:54:51.467156000 1987-10-21 10:06:01.203959000 4948875125825104716204172398508318.303 +120173010 214677364 doj qlg bvh 1963-12-12 15:48:04.447513000 1939-03-17 18:35:27.517627000 -17766147403551730048606199555077153.46 +1457586572 1646726354 xse hsn oju 1992-01-05 22:16:25.012402000 1939-01-11 14:49:02.082757000 14853981718977285657224221865607349.717 +1538197082 -1216388566 ido cwi bvq 1977-06-03 06:28:08.894538000 1963-06-13 10:07:17.684362000 19132499615412065452659038017644841.229 +-1311548290 -1217043936 pkf mhs gbv 1939-08-07 22:00:23.019856000 1987-12-12 13:07:09.551855000 17700701750711955238186549968150398.967 +26061878 215201660 tfa toj kvq 1967-08-28 22:17:41.492805000 1987-10-08 09:20:44.116985000 -13487304983611456110286414136037838.716 +106672388 1646988502 epk xje wrd 1953-01-24 06:29:25.374941000 1939-02-06 16:19:36.256705000 -9208787087176676314851597984000347.204 +1551828778 1646333132 lgr ito cwi 1988-05-03 16:32:05.054085000 1938-12-29 14:03:44.995783000 10616678307740262008180502622801408.677 +-1405659422 -1216519640 gbv pkv wrm 1943-04-24 04:30:00.065147000 1963-05-31 09:22:00.597388000 711896238093575210045422422704200.495 +-1325048912 215463808 qlg kfa sey 1965-04-02 12:40:08.554130000 1987-11-03 10:51:18.290933000 4990414134528355005480238574741692.007 +120304084 214808438 hsn upk xje 1963-12-25 16:33:21.534487000 1987-09-25 08:35:27.030011000 -17724608394848479759330133378843779.756 +1457717646 1646857428 cwi lwr sny 1992-01-18 23:01:42.099376000 1939-01-24 15:34:19.169731000 14895520727680535946500288041840723.421 +1538328156 -1216257492 mhs gbm fau 1977-06-16 07:13:25.981512000 1963-06-26 10:52:34.771336000 19174038624115315741935104193878214.933 +-1311417216 -1216912862 toj qlw kfa 1939-08-20 22:45:40.106830000 1963-05-18 08:36:43.510414000 17742240759415205527462616144383772.671 +26192952 215332734 xje xsn oau 1967-09-10 23:02:58.579779000 1987-10-21 10:06:01.203959000 -13445765974908205821010347959804465.012 +106803462 214677364 ito cni bvh 1953-02-06 07:14:42.461915000 1939-02-19 17:04:53.343679000 -9167248078473426025575531807766973.5 +1551959852 1646464206 pkv mxs gbm 1988-05-16 17:17:22.141059000 1939-01-11 14:49:02.082757000 10658217316443512297456568799034782.381 +-1405528348 -1216388566 kfa toa bvq 1943-05-07 05:15:17.152121000 1963-06-13 10:07:17.684362000 753435246796825499321488598937574.199 +-1324917838 -1217043936 upk oje gbv 1965-04-15 13:25:25.641104000 1987-11-16 11:36:35.377907000 5031953143231605294756304750975065.711 +120435158 214939512 lwr yto cni 1964-01-07 17:18:38.621461000 1987-10-08 09:20:44.116985000 -17683069386145229470054067202610406.052 +1457848720 1646988502 gbm pbv wrd 1992-01-31 23:46:59.186350000 1939-02-06 16:19:36.256705000 14937059736383786235776354218074097.125 +1538459230 1646333132 qlw kfq cwi 1977-06-29 07:58:43.068486000 1963-07-09 11:37:51.858310000 19215577632818566031211170370111588.637 +-1311286142 -1216781788 xsn upb oje 1939-09-02 23:30:57.193804000 1963-05-31 09:22:00.597388000 17783779768118455816738682320617146.375 +26324026 215463808 cni cwr sey 1967-09-23 23:48:15.666753000 1987-11-03 10:51:18.290933000 -13404226966204955531734281783571091.308 +106934536 214808438 mxs grm xje 1953-02-19 07:59:59.548889000 1939-03-04 17:50:10.430653000 -9125709069770175736299465631533599.796 +1552090926 1646595280 toa qcw kfq 1988-05-29 18:02:39.228033000 1939-01-24 15:34:19.169731000 10699756325146762586732634975268156.085 +-1405397274 -1216257492 oje xse fau 1943-05-20 06:00:34.239095000 1963-06-26 10:52:34.771336000 794974255500075788597554775170947.903 +-1324786764 -1216912862 yto sni kfa 1965-04-28 14:10:42.728078000 1987-11-29 12:21:52.464881000 5073492151934855584032370927208439.415 +120566232 215070586 pbv dxs grm 1964-01-20 18:03:55.708435000 1987-10-21 10:06:01.203959000 -17641530377441979180778001026377032.348 +1457979794 214677364 kfq tfa bvh 1992-02-14 00:32:16.273324000 1939-02-19 17:04:53.343679000 14978598745087036525052420394307470.829 +1538590304 1646464206 upb oju gbm 1977-07-12 08:44:00.155460000 1963-07-22 12:23:08.945284000 19257116641521816320487236546344962.341 +-1311155068 -1216650714 cwr ytf sni 1939-09-16 00:16:14.280778000 1963-06-13 10:07:17.684362000 17825318776821706106014748496850520.079 +26455100 -1217043936 grm gbv gbv 1967-10-07 00:33:32.753727000 1987-11-16 11:36:35.377907000 -13362687957501705242458215607337717.604 +107065610 214939512 qcw kvq cni 1953-03-04 08:45:16.635863000 1939-03-17 18:35:27.517627000 -9084170061066925447023399455300226.092 +1552222000 1646726354 xse ugb oju 1988-06-11 18:47:56.315006000 1939-02-06 16:19:36.256705000 10741295333850012876008701151501529.789 +-1405266200 1646333132 sni cwi cwi 1943-06-02 06:45:51.326069000 1963-07-09 11:37:51.858310000 836513264203326077873620951404321.607 +-1324655690 \N dxs wrm oje 1965-05-11 14:55:59.815052000 1987-12-12 13:07:09.551855000 5115031160638105873308437103441813.119 +120697306 -1216781788 tfa hcw kvq 1964-02-02 18:49:12.795409000 1987-11-03 10:51:18.290933000 -17599991368738728891501934850143658.644 +1458110868 215201660 oju xje xje 1992-02-27 01:17:33.360298000 1939-03-04 17:50:10.430653000 15020137753790286814328486570540844.533 +1538721378 214808438 ytf sny kfq 1977-07-25 09:29:17.242434000 1938-12-29 14:03:44.995783000 19298655650225066609763302722578336.045 +-1311023994 1646595280 gbv dxj wrm 1939-09-29 01:01:31.367752000 1963-06-26 10:52:34.771336000 17866857785524956395290814673083893.783 +26586174 -1216519640 kvq kfa kfa 1967-10-20 01:18:49.840701000 1987-11-29 12:21:52.464881000 -13321148948798454953182149431104343.9 +107196684 -1216912862 ugb oau grm 1953-03-17 09:30:33.722837000 1987-09-25 08:35:27.030011000 -9042631052363675157747333279066852.388 +1552353074 215070586 cwi ykf sny 1988-06-24 19:33:13.401980000 1939-02-19 17:04:53.343679000 10782834342553263165284767327734903.493 +-1405135126 1646857428 wrm gbm gbm 1943-06-15 07:31:08.413043000 1963-07-22 12:23:08.945284000 878052272906576367149687127637695.311 +-1324524616 1646464206 hcw bvq sni 1965-05-24 15:41:16.902026000 1963-05-18 08:36:43.510414000 5156570169341356162584503279675186.823 +120828380 -1216650714 xje lgb oau 1964-02-15 19:34:29.882383000 1987-11-16 11:36:35.377907000 -17558452360035478602225868673910284.94 +1458241942 215332734 sny cni cni 1992-03-11 02:02:50.447272000 1939-03-17 18:35:27.517627000 15061676762493537103604552746774218.237 +1538852452 214939512 dxj wrd oju 1977-08-07 10:14:34.329408000 1939-01-11 14:49:02.082757000 19340194658928316899039368898811709.749 +-1310892920 1646726354 kfa hcn bvq 1939-10-12 01:46:48.454726000 1963-07-09 11:37:51.858310000 17908396794228206684566880849317267.487 +26717248 -1216388566 oau oje oje 1967-11-02 02:04:06.927675000 1987-12-12 13:07:09.551855000 -13279609940095204663906083254870970.196 +107327758 -1216781788 ykf sey kvq 1953-03-30 10:15:50.809811000 1987-10-08 09:20:44.116985000 -9001092043660424868471267102833478.684 +1552484148 215201660 gbm doj wrd 1988-07-07 20:18:30.488954000 1939-03-04 17:50:10.430653000 10824373351256513454560833503968277.197 +-1405004052 1646988502 bvq kfq kfq 1943-06-28 08:16:25.500017000 1938-12-29 14:03:44.995783000 919591281609826656425753303871069.015 +-1324393542 1646595280 lgb fau wrm 1965-06-06 16:26:33.989000000 1963-05-31 09:22:00.597388000 5198109178044606451860569455908560.527 +120959454 -1216519640 cni pkf sey 1964-02-28 20:19:46.969357000 1987-11-29 12:21:52.464881000 -17516913351332228312949802497676911.236 +1458373016 215463808 wrd grm grm 1992-03-24 02:48:07.534246000 1987-09-25 08:35:27.030011000 15103215771196787392880618923007591.941 +1538983526 215070586 hcn bvh sny 1977-08-20 10:59:51.416382000 1939-01-24 15:34:19.169731000 19381733667631567188315435075045083.453 +-1310761846 1646857428 oje lgr fau 1939-10-25 02:32:05.541700000 1963-07-22 12:23:08.945284000 17949935802931456973842947025550641.191 +26848322 -1216257492 sey sni sni 1967-11-15 02:49:24.014649000 1963-05-18 08:36:43.510414000 -13238070931391954374630017078637596.492 +107458832 -1216650714 doj wid oau 1953-04-12 11:01:07.896785000 1987-10-21 10:06:01.203959000 -8959553034957174579195200926600104.98 +1552615222 215332734 kfq hsn bvh 1988-07-20 21:03:47.575928000 1939-03-17 18:35:27.517627000 10865912359959763743836899680201650.901 +-1404872978 214677364 fau oju oju 1943-07-11 09:01:42.586991000 1939-01-11 14:49:02.082757000 961130290313076945701819480104442.719 +-1324262468 1646726354 pkf jey bvq 1965-06-19 17:11:51.075974000 1963-06-13 10:07:17.684362000 5239648186747856741136635632141934.231 +121090528 -1216388566 grm toj gbv 1964-03-12 21:05:04.056331000 1987-12-12 13:07:09.551855000 -17475374342628978023673736321443537.532 +1458504090 -1217043936 bvh kvq kvq 1992-04-06 03:33:24.621220000 1987-10-08 09:20:44.116985000 15144754779900037682156685099240965.645 +1539114600 215201660 lgr fal wrd 1977-09-02 11:45:08.503356000 1939-02-06 16:19:36.256705000 19423272676334817477591501251278457.157 +-1310630772 1646988502 sni pkv cwi 1939-11-07 03:17:22.628674000 1938-12-29 14:03:44.995783000 17991474811634707263119013201784014.895 +26979396 1646333132 wid wrm wrm 1967-11-28 03:34:41.101623000 1963-05-31 09:22:00.597388000 -13196531922688704085353950902404222.788 +107589906 -1216519640 hsn bmh sey 1953-04-25 11:46:24.983759000 1987-11-03 10:51:18.290933000 -8918014026253924289919134750366731.276 +1552746296 215463808 oju lwr xje 1988-08-02 21:49:04.662902000 1987-09-25 08:35:27.030011000 10907451368663014033112965856435024.605 +-1404741904 214808438 jey sny sny 1943-07-24 09:46:59.673965000 1939-01-24 15:34:19.169731000 1002669299016327234977885656337816.423 +-1324131394 1646857428 toj nid fau 1965-07-02 17:57:08.162948000 1963-06-26 10:52:34.771336000 5281187195451107030412701808375307.935 +121221602 -1216257492 kvq xsn kfa 1964-03-25 21:50:21.143305000 1963-05-18 08:36:43.510414000 -17433835333925727734397670145210163.828 +1458635164 -1216912862 fal oau oau 1992-04-19 04:18:41.708194000 1987-10-21 10:06:01.203959000 15186293788603287971432751275474339.349 +1539245674 215332734 pkv jep bvh 1977-09-15 12:30:25.590330000 1939-02-19 17:04:53.343679000 19464811685038067766867567427511830.861 +-1310499698 214677364 wrm toa gbm 1939-11-20 04:02:39.715648000 1939-01-11 14:49:02.082757000 18033013820337957552395079378017388.599 +27110470 1646464206 bmh bvq bvq 1967-12-11 04:19:58.188597000 1963-06-13 10:07:17.684362000 -13154992913985453796077884726170849.084 +107720980 -1216388566 lwr fql gbv 1953-05-08 12:31:42.070732000 1987-11-16 11:36:35.377907000 -8876475017550674000643068574133357.572 +1552877370 -1217043936 sny pbv cni 1988-08-15 22:34:21.749876000 1987-10-08 09:20:44.116985000 10948990377366264322389032032668398.309 +-1404610830 214939512 nid wrd wrd 1943-08-06 10:32:16.760939000 1939-02-06 16:19:36.256705000 1044208307719577524253951832571190.127 +-1324000320 1646988502 xsn rmh cwi 1965-07-15 18:42:25.249922000 1963-07-09 11:37:51.858310000 5322726204154357319688767984608681.639 +121352676 1646333132 oau cwr oje 1964-04-07 22:35:38.230279000 1963-05-31 09:22:00.597388000 -17392296325222477445121603968976790.124 +1458766238 -1216781788 jep sey sey 1992-05-02 05:03:58.795168000 1987-11-03 10:51:18.290933000 15227832797306538260708817451707713.053 +1539376748 215463808 toa nit xje 1977-09-28 13:15:42.677304000 1939-03-04 17:50:10.430653000 19506350693741318056143633603745204.565 +-1310368624 214808438 bvq xse kfq 1939-12-03 04:47:56.802622000 1939-01-24 15:34:19.169731000 18074552829041207841671145554250762.303 +27241544 1646595280 fql fau fau 1967-12-24 05:05:15.275570000 1963-06-26 10:52:34.771336000 -13113453905282203506801818549937475.38 +107852054 -1216257492 pbv jup kfa 1953-05-21 13:16:59.157706000 1987-11-29 12:21:52.464881000 -8834936008847423711367002397899983.868 +1553008444 -1216912862 wrd tfa grm 1988-08-28 23:19:38.836850000 1987-10-21 10:06:01.203959000 10990529386069514611665098208901772.013 +-1404479756 215070586 rmh bvh bvh 1943-08-19 11:17:33.847913000 1939-02-19 17:04:53.343679000 1085747316422827813530018008804563.831 +-1323869246 214677364 cwr vql gbm 1965-07-28 19:27:42.336896000 1963-07-22 12:23:08.945284000 5364265212857607608964834160842055.343 +121483750 1646464206 sey gbv sni 1964-04-20 23:20:55.317253000 1963-06-13 10:07:17.684362000 -17350757316519227155845537792743416.42 +1458897312 -1216650714 nit wid gbv 1992-05-15 05:49:15.882142000 1987-11-16 11:36:35.377907000 15269371806009788549984883627941086.757 +1539507822 -1217043936 xse rmx cni 1977-10-11 14:00:59.764278000 1939-03-17 18:35:27.517627000 19547889702444568345419699779978578.269 +-1310237550 214939512 fau cwi oju 1939-12-16 05:33:13.889596000 1939-02-06 16:19:36.256705000 18116091837744458130947211730484136.007 +27372618 1646726354 jup jey cwi 1968-01-06 05:50:32.362544000 1963-07-09 11:37:51.858310000 -13071914896578953217525752373704101.676 +107983128 1646333132 tfa nyt oje 1953-06-03 14:02:16.244680000 1987-12-12 13:07:09.551855000 -8793397000144173422090936221666610.164 +1553139518 -1216781788 bvh xje kvq 1988-09-11 00:04:55.923824000 1987-11-03 10:51:18.290933000 11032068394772764900941164385135145.717 +-1404348682 215201660 vql fal xje 1943-09-01 12:02:50.934887000 1939-03-04 17:50:10.430653000 1127286325126078102806084185037937.535 +-1323738172 214808438 gbv aup kfq 1965-08-10 20:12:59.423870000 1938-12-29 14:03:44.995783000 5405804221560857898240900337075429.047 +121614824 1646595280 wid kfa wrm 1964-05-04 00:06:12.404227000 1963-06-26 10:52:34.771336000 -17309218307815976866569471616510042.716 +1459028386 -1216519640 rmx bmh kfa 1992-05-28 06:34:32.969116000 1987-11-29 12:21:52.464881000 15310910814713038839260949804174460.461 +1539638896 -1216912862 cwi vqc grm 1977-10-24 14:46:16.851252000 1987-09-25 08:35:27.030011000 19589428711147818634695765956211951.973 +-1310106476 215070586 jey gbm sny 1939-12-29 06:18:30.976570000 1939-02-19 17:04:53.343679000 18157630846447708420223277906717509.711 +27503692 1646857428 nyt nid gbm 1968-01-19 06:35:49.449518000 1963-07-22 12:23:08.945284000 -13030375887875702928249686197470727.972 +108114202 1646464206 xje rdx sni 1953-06-16 14:47:33.331654000 1963-05-18 08:36:43.510414000 -8751857991440923132814870045433236.46 +1553270592 -1216650714 fal cni oau 1988-09-24 00:50:13.010798000 1987-11-16 11:36:35.377907000 11073607403476015190217230561368519.421 +-1404217608 215332734 aup jep cni 1943-09-14 12:48:08.021861000 1939-03-17 18:35:27.517627000 1168825333829328392082150361271311.239 +-1323607098 214939512 kfa eyt oju 1965-08-23 20:58:16.510844000 1939-01-11 14:49:02.082757000 5447343230264108187516966513308802.751 +121745898 1646726354 bmh oje bvq 1964-05-17 00:51:29.491201000 1963-07-09 11:37:51.858310000 -17267679299112726577293405440276669.012 +1459159460 -1216388566 vqc fql oje 1992-06-10 07:19:50.056090000 1987-12-12 13:07:09.551855000 15352449823416289128537015980407834.165 +1539769970 -1216781788 gbm aug kvq 1977-11-06 15:31:33.938226000 1987-10-08 09:20:44.116985000 19630967719851068923971832132445325.677 +-1309975402 215201660 nid kfq wrd 1940-01-11 07:03:48.063544000 1939-03-04 17:50:10.430653000 18199169855150958709499344082950883.415 +27634766 1646988502 rdx rmh kfq 1968-02-01 07:21:06.536492000 1938-12-29 14:03:44.995783000 -12988836879172452638973620021237354.268 +108245276 1646595280 cni vhc wrm 1953-06-29 15:32:50.418628000 1963-05-31 09:22:00.597388000 -8710318982737672843538803869199862.756 +1553401666 -1216519640 jep grm sey 1988-10-07 01:35:30.097772000 1987-11-29 12:21:52.464881000 11115146412179265479493296737601893.125 +-1404086534 215463808 eyt nit grm 1943-09-27 13:33:25.108835000 1987-09-25 08:35:27.030011000 1210364342532578681358216537504684.943 +-1323476024 215070586 oje idx sny 1965-09-05 21:43:33.597818000 1939-01-24 15:34:19.169731000 -14457305820759193856084943006068793.4 +121876972 1646857428 fql sni fau 1964-05-30 01:36:46.578175000 1963-07-22 12:23:08.945284000 -17226140290409476288017339264043295.308 +1459290534 -1216257492 aug jup sni 1992-06-23 08:05:07.143064000 1963-05-18 08:36:43.510414000 15393988832119539417813082156641207.869 +1539901044 -1216650714 kfq eyk oau 1977-11-19 16:16:51.025199000 1987-10-21 10:06:01.203959000 -263296658230627690673753078742955.402 +-1309844328 215332734 rmh oju bvh 1940-01-24 07:49:05.150517000 1939-03-17 18:35:27.517627000 18240708863854208998775410259184257.119 +27765840 214677364 vhc vql oju 1968-02-14 08:06:23.623466000 1939-01-11 14:49:02.082757000 -12947297870469202349697553845003980.564 +108376350 1646726354 grm alg bvq 1953-07-12 16:18:07.505602000 1963-06-13 10:07:17.684362000 13935904563474403629658083479999083.81 +1553532740 -1216388566 nit kvq gbv \N 1987-12-12 13:07:09.551855000 11156685420882515768769362913835266.829 +-1403955460 -1217043936 idx rmx kvq 1988-10-20 02:20:47.184746000 1987-10-08 09:20:44.116985000 1251903351235828970634282713738058.647 +-1323344950 215201660 sni mhc wrd 1943-10-10 14:18:42.195809000 1939-02-06 16:19:36.256705000 -14415766812055943566808876829835419.696 +122008046 1646988502 jup wrm cwi 1965-09-18 22:28:50.684792000 1938-12-29 14:03:44.995783000 -17184601281706225998741273087809921.604 +1459421608 1646333132 eyk nyt wrm 1964-06-12 02:22:03.665149000 1963-05-31 09:22:00.597388000 15435527840822789707089148332874581.573 +1540032118 -1216519640 oju ido sey 1992-07-06 08:50:24.230037000 1987-11-03 10:51:18.290933000 -221757649527377401397686902509581.698 +-1309713254 215463808 vql sny xje 1977-12-02 17:02:08.112173000 1987-09-25 08:35:27.030011000 18282247872557459288051476435417630.823 +27896914 214808438 alg aup sny 1940-02-06 08:34:22.237491000 1939-01-24 15:34:19.169731000 -12905758861765952060421487668770606.86 +108507424 1646857428 kvq epk fau 1968-02-27 08:51:40.710440000 1963-06-26 10:52:34.771336000 13977443572177653918934149656232457.514 +1553663814 -1216257492 rmx oau kfa 1953-07-25 17:03:24.592576000 1963-05-18 08:36:43.510414000 11198224429585766058045429090068640.533 +-1403824386 -1216912862 mhc vqc oau 1988-11-02 03:06:04.271720000 1987-10-21 10:06:01.203959000 1293442359939079259910348889971432.351 +-1323213876 215332734 wrm qlg bvh 1943-10-23 15:03:59.282783000 1939-02-19 17:04:53.343679000 -14374227803352693277532810653602045.992 +122139120 214677364 nyt bvq gbm 1965-10-01 23:14:07.771766000 1939-01-11 14:49:02.082757000 -17143062273002975709465206911576547.9 +1459552682 1646464206 ido rdx bvq 1964-06-25 03:07:20.752123000 1963-06-13 10:07:17.684362000 15477066849526039996365214509107955.277 +1540163192 -1216388566 sny mhs gbv 1992-07-19 09:35:41.317011000 1987-11-16 11:36:35.377907000 -180218640824127112121620726276207.994 +-1309582180 -1217043936 aup wrd cni 1977-12-15 17:47:25.199147000 1987-10-08 09:20:44.116985000 18323786881260709577327542611651004.527 +28027988 214939512 epk eyt wrd 1940-02-19 09:19:39.324465000 1939-02-06 16:19:36.256705000 -12864219853062701771145421492537233.156 +108638498 1646988502 oau ito cwi 1968-03-11 09:36:57.797414000 1963-07-09 11:37:51.858310000 14018982580880904208210215832465831.218 +1553794888 1646333132 vqc sey oje 1953-08-07 17:48:41.679550000 1963-05-31 09:22:00.597388000 11239763438289016347321495266302014.237 +-1403693312 -1216781788 qlg aug sey 1988-11-15 03:51:21.358694000 1987-11-03 10:51:18.290933000 1334981368642329549186415066204806.055 +-1323082802 215463808 bvq upk xje 1943-11-05 15:49:16.369757000 1939-03-04 17:50:10.430653000 -14332688794649442988256744477368672.288 +\N 214808438 rdx fau kfq 1965-10-14 23:59:24.858740000 1939-01-24 15:34:19.169731000 -17101523264299725420189140735343174.196 +122270194 1646595280 mhs vhc fau 1964-07-08 03:52:37.839097000 1963-06-26 10:52:34.771336000 15518605858229290285641280685341328.981 +1459683756 -1216257492 wrd qlw kfa 1992-08-01 10:20:58.403985000 1987-11-29 12:21:52.464881000 -138679632120876822845554550042834.29 +1540294266 -1216912862 eyt bvh grm 1977-12-28 18:32:42.286121000 1987-10-21 10:06:01.203959000 18365325889963959866603608787884378.231 +-1309451106 215070586 ito idx bvh 1940-03-03 10:04:56.411439000 1939-02-19 17:04:53.343679000 -12822680844359451481869355316303859.452 +28159062 214677364 sey mxs gbm 1968-03-24 10:22:14.884388000 1963-07-22 12:23:08.945284000 14060521589584154497486282008699204.922 +108769572 1646464206 aug wid sni 1953-08-20 18:33:58.766524000 1963-06-13 10:07:17.684362000 11281302446992266636597561442535387.941 +1553925962 -1216650714 upk eyk gbv 1988-11-28 04:36:38.445668000 1987-11-16 11:36:35.377907000 1376520377345579838462481242438179.759 +-1403562238 -1217043936 fau yto cni 1943-11-18 16:34:33.456731000 1939-03-17 18:35:27.517627000 -14291149785946192698980678301135298.584 +-1322951728 214939512 vhc jey oju 1965-10-28 00:44:41.945714000 1939-02-06 16:19:36.256705000 -17059984255596475130913074559109800.492 +122401268 1646726354 qlw alg cwi 1964-07-21 04:37:54.926071000 1963-07-09 11:37:51.858310000 15560144866932540574917346861574702.685 +1459814830 1646333132 bvh upb oje 1992-08-14 11:06:15.490959000 1987-12-12 13:07:09.551855000 -97140623417626533569488373809460.586 +1540425340 -1216781788 idx fal kvq 1978-01-10 19:17:59.373095000 1987-11-03 10:51:18.290933000 18406864898667210155879674964117751.935 +-1309320032 215201660 mxs mhc xje 1940-03-16 10:50:13.498413000 1939-03-04 17:50:10.430653000 -12781141835656201192593289140070485.748 +28290136 214808438 wid qcw kfq 1968-04-06 11:07:31.971362000 1938-12-29 14:03:44.995783000 14102060598287404786762348184932578.626 +108900646 1646595280 eyk bmh wrm 1953-09-02 19:19:15.853498000 1963-06-26 10:52:34.771336000 11322841455695516925873627618768761.645 +1554057036 -1216519640 yto ido kfa 1988-12-11 05:21:55.532642000 1987-11-29 12:21:52.464881000 1418059386048830127738547418671553.463 +-1403431164 -1216912862 jey dxs grm 1943-12-01 17:19:50.543705000 1987-09-25 08:35:27.030011000 -14249610777242942409704612124901924.88 +-1322820654 215070586 alg nid sny 1965-11-10 01:29:59.032688000 1939-02-19 17:04:53.343679000 -17018445246893224841637008382876426.788 +122532342 1646857428 upb epk gbm 1964-08-03 05:23:12.013045000 1963-07-22 12:23:08.945284000 15601683875635790864193413037808076.389 +1459945904 1646464206 fal ytf sni 1992-08-27 11:51:32.577933000 1963-05-18 08:36:43.510414000 -55601614714376244293422197576086.882 +1540556414 -1216650714 mhc jep oau 1978-01-23 20:03:16.460069000 1987-11-16 11:36:35.377907000 18448403907370460445155741140351125.639 +-1309188958 215332734 qcw qlg cni 1940-03-29 11:35:30.585387000 1939-03-17 18:35:27.517627000 -12739602826952950903317222963837112.044 +28421210 214939512 bmh ugb oju 1968-04-19 11:52:49.058336000 1939-01-11 14:49:02.082757000 14143599606990655076038414361165952.33 +109031720 1646726354 ido fql bvq 1953-09-15 20:04:32.940472000 1963-07-09 11:37:51.858310000 11364380464398767215149693795002135.349 +1554188110 -1216388566 dxs mhs oje 1988-12-24 06:07:12.619616000 1987-12-12 13:07:09.551855000 1459598394752080417014613594904927.167 +-1403300090 -1216781788 nid hcw kvq 1943-12-14 18:05:07.630679000 1987-10-08 09:20:44.116985000 \N +-1322689580 215201660 epk rmh wrd 1965-11-23 02:15:16.119662000 1939-03-04 17:50:10.430653000 -14208071768539692120428545948668551.176 +122663416 1646988502 ytf ito kfq 1964-08-16 06:08:29.100019000 1938-12-29 14:03:44.995783000 -16976906238189974552360942206643053.084 +1460076978 1646595280 jep dxj wrm 1992-09-09 12:36:49.664907000 1963-05-31 09:22:00.597388000 15643222884339041153469479214041450.093 +1540687488 -1216519640 qlg nit sey 1978-02-05 20:48:33.547043000 1987-11-29 12:21:52.464881000 -14062606011125955017356021342713.178 +-1309057884 215463808 ugb upk grm 1940-04-11 12:20:47.672361000 1987-09-25 08:35:27.030011000 18489942916073710734431807316584499.343 +28552284 215070586 fql ykf sny 1968-05-02 12:38:06.145310000 1939-01-24 15:34:19.169731000 -12698063818249700614041156787603738.34 +109162794 1646857428 mhs jup fau 1953-09-28 20:49:50.027446000 1963-07-22 12:23:08.945284000 14185138615693905365314480537399326.034 +1554319184 -1216257492 hcw qlw sni 1989-01-06 06:52:29.706590000 1963-05-18 08:36:43.510414000 11405919473102017504425759971235509.053 +-1403169016 -1216650714 rmh lgb oau 1943-12-27 18:50:24.717653000 1987-10-21 10:06:01.203959000 1501137403455330706290679771138300.871 +-1322558506 215332734 ito vql bvh 1965-12-06 03:00:33.206636000 1939-03-17 18:35:27.517627000 -14166532759836441831152479772435177.472 +122794490 214677364 dxj mxs oju 1964-08-29 06:53:46.186993000 1939-01-11 14:49:02.082757000 -16935367229486724263084876030409679.38 +1460208052 1646726354 nit hcn bvq 1992-09-22 13:22:06.751881000 1963-06-13 10:07:17.684362000 15684761893042291442745545390274823.797 +1540818562 -1216388566 upk rmx gbv 1978-02-18 21:33:50.634017000 1987-12-12 13:07:09.551855000 -21240171529866529632202202809594852.69 +-1308926810 -1217043936 ykf yto kvq 1940-04-24 13:06:04.759335000 1987-10-08 09:20:44.116985000 18531481924776961023707873492817873.047 +28683358 215201660 jup doj wrd 1968-05-15 13:23:23.232284000 1939-02-06 16:19:36.256705000 -12656524809546450324765090611370364.636 +109293868 1646988502 qlw nyt cwi 1953-10-11 21:35:07.114420000 1938-12-29 14:03:44.995783000 14226677624397155654590546713632699.738 +1554450258 1646333132 lgb upb wrm 1989-01-19 07:37:46.793564000 1963-05-31 09:22:00.597388000 11447458481805267793701826147468882.757 +-1403037942 -1216519640 vql pkf sey 1944-01-09 19:35:41.804627000 1987-11-03 10:51:18.290933000 1542676412158580995566745947371674.575 +-1322427432 215463808 mxs aup xje 1965-12-19 03:45:50.293610000 1987-09-25 08:35:27.030011000 -14124993751133191541876413596201803.768 +122925564 214808438 hcn qcw sny 1964-09-11 07:39:03.273967000 1939-01-24 15:34:19.169731000 -16893828220783473973808809854176305.676 +1460339126 1646857428 rmx lgr fau 1992-10-05 14:07:23.838855000 1963-06-26 10:52:34.771336000 15726300901745541732021611566508197.501 +1540949636 -1216257492 yto vqc kfa 1978-03-03 22:19:07.720991000 1963-05-18 08:36:43.510414000 -21198632521163279342926136633361478.986 +-1308795736 -1216912862 doj dxs oau 1940-05-07 13:51:21.846309000 1987-10-21 10:06:01.203959000 18573020933480211312983939669051246.751 +28814432 215332734 nyt hsn bvh 1968-05-28 14:08:40.319258000 1939-02-19 17:04:53.343679000 -12614985800843200035489024435136990.932 +109424942 214677364 upb rdx gbm 1953-10-24 22:20:24.201394000 1939-01-11 14:49:02.082757000 14268216633100405943866612889866073.442 +1554581332 1646464206 pkf ytf bvq 1989-02-01 08:23:03.880538000 1963-06-13 10:07:17.684362000 11488997490508518082977892323702256.461 +-1402906868 -1216388566 aup toj gbv 1944-01-22 20:20:58.891601000 1987-11-16 11:36:35.377907000 1584215420861831284842812123605048.279 +-1322296358 -1217043936 qcw eyt cni 1966-01-01 04:31:07.380583000 1987-10-08 09:20:44.116985000 -14083454742429941252600347419968430.064 +123056638 214939512 lgr ugb wrd 1964-09-24 08:24:20.360940000 1939-02-06 16:19:36.256705000 -16852289212080223684532743677942931.972 +1460470200 1646988502 vqc pkv cwi 1992-10-18 14:52:40.925829000 1963-07-09 11:37:51.858310000 15767839910448792021297677742741571.205 +1541080710 1646333132 dxs aug oje 1978-03-16 23:04:24.807965000 1963-05-31 09:22:00.597388000 -21157093512460029053650070457128105.282 +-1308664662 -1216781788 hsn hcw sey 1940-05-20 14:36:38.933283000 1987-11-03 10:51:18.290933000 18614559942183461602260005845284620.455 +28945506 215463808 rdx lwr xje 1968-06-10 14:53:57.406232000 1939-03-04 17:50:10.430653000 -12573446792139949746212958258903617.228 +109556016 214808438 ytf vhc kfq 1953-11-06 23:05:41.288368000 1939-01-24 15:34:19.169731000 14309755641803656233142679066099447.146 +1554712406 1646595280 toj dxj fau 1989-02-14 09:08:20.967512000 1963-06-26 10:52:34.771336000 11530536499211768372253958499935630.165 +-1402775794 -1216257492 eyt xsn kfa 1944-02-04 21:06:15.978575000 1987-11-29 12:21:52.464881000 1625754429565081574118878299838421.983 +-1322165284 -1216912862 ugb idx grm 1966-01-14 05:16:24.467557000 1987-10-21 10:06:01.203959000 -14041915733726690963324281243735056.36 +123187712 215070586 pkv ykf bvh 1964-10-07 09:09:37.447914000 1939-02-19 17:04:53.343679000 -16810750203376973395256677501709558.268 +1460601274 214677364 aug toa gbm 1992-10-31 15:37:58.012803000 1963-07-22 12:23:08.945284000 15809378919152042310573743918974944.909 +1541211784 1646464206 hcw eyk sni 1978-03-29 23:49:41.894939000 1963-06-13 10:07:17.684362000 -21115554503756778764374004280894731.578 +-1308533588 -1216650714 lwr lgb gbv 1940-06-02 15:21:56.020257000 1987-11-16 11:36:35.377907000 18656098950886711891536072021517994.159 +29076580 -1217043936 vhc pbv cni 1968-06-23 15:39:14.493206000 1939-03-17 18:35:27.517627000 -12531907783436699456936892082670243.524 +109687090 214939512 dxj alg oju 1953-11-19 23:50:58.375342000 1939-02-06 16:19:36.256705000 14351294650506906522418745242332820.85 +1554843480 1646726354 xsn hcn cwi 1989-02-27 09:53:38.054486000 1963-07-09 11:37:51.858310000 11572075507915018661530024676169003.869 +-1402644720 1646333132 idx cwr oje 1944-02-17 21:51:33.065548000 1987-12-12 13:07:09.551855000 1667293438268331863394944476071795.687 +-1322034210 -1216781788 ykf mhc kvq 1966-01-27 06:01:41.554531000 1987-11-03 10:51:18.290933000 -14000376725023440674048215067501682.656 +123318786 215201660 toa doj xje 1964-10-20 09:54:54.534888000 1939-03-04 17:50:10.430653000 -16769211194673723105980611325476184.564 +1460732348 214808438 eyk xse kfq 1992-11-13 16:23:15.099777000 1938-12-29 14:03:44.995783000 15850917927855292599849810095208318.613 +1541342858 1646595280 lgb ido wrm 1978-04-12 00:34:58.981913000 1963-06-26 10:52:34.771336000 -21074015495053528475097938104661357.874 +-1308402514 -1216519640 pbv pkf kfa 1940-06-15 16:07:13.107231000 1987-11-29 12:21:52.464881000 18697637959589962180812138197751367.863 +29207654 -1216912862 alg tfa grm 1968-07-06 16:24:31.580180000 1987-09-25 08:35:27.030011000 -12490368774733449167660825906436869.82 +109818164 215070586 hcn epk sny 1953-12-03 00:36:15.462316000 1939-02-19 17:04:53.343679000 14392833659210156811694811418566194.554 +1554974554 1646857428 cwr lgr gbm 1989-03-12 10:38:55.141460000 1963-07-22 12:23:08.945284000 11613614516618268950806090852402377.573 +-1402513646 1646464206 mhc gbv sni 1944-03-01 22:36:50.152522000 1963-05-18 08:36:43.510414000 1708832446971582152671010652305169.391 +-1321903136 -1216650714 doj qlg oau 1966-02-09 06:46:58.641505000 1987-11-16 11:36:35.377907000 -13958837716320190384772148891268308.952 +123449860 215332734 xse hsn cni 1964-11-02 10:40:11.621862000 1939-03-17 18:35:27.517627000 -16727672185970472816704545149242810.86 +1460863422 214939512 ido cwi oju 1992-11-26 17:08:32.186751000 1939-01-11 14:49:02.082757000 15892456936558542889125876271441692.317 +1541473932 1646726354 pkf mhs bvq 1978-04-25 01:20:16.068887000 1963-07-09 11:37:51.858310000 -21032476486350278185821871928427984.17 +-1308271440 -1216388566 tfa toj oje 1940-06-28 16:52:30.194205000 1987-12-12 13:07:09.551855000 18739176968293212470088204373984741.567 +29338728 -1216781788 epk xje kvq 1968-07-19 17:09:48.667154000 1987-10-08 09:20:44.116985000 -12448829766030198878384759730203496.116 +109949238 215201660 lgr ito wrd 1953-12-16 01:21:32.549290000 1939-03-04 17:50:10.430653000 14434372667913407100970877594799568.258 +1555105628 1646988502 gbv pkv kfq 1989-03-25 11:24:12.228434000 1938-12-29 14:03:44.995783000 11655153525321519240082157028635751.277 +-1402382572 1646595280 qlg kfa wrm 1944-03-14 23:22:07.239496000 1963-05-31 09:22:00.597388000 1750371455674832441947076828538543.095 +-1321772062 \N hsn upk sey 1966-02-22 07:32:15.728479000 1987-11-29 12:21:52.464881000 -13917298707616940095496082715034935.248 +123580934 -1216519640 cwi lwr grm 1964-11-15 11:25:28.708836000 1987-09-25 08:35:27.030011000 -16686133177267222527428478973009437.156 +1460994496 215463808 mhs gbm sny 1992-12-09 17:53:49.273725000 1939-01-24 15:34:19.169731000 15933995945261793178401942447675066.021 +1541605006 215070586 toj qlw fau 1978-05-08 02:05:33.155861000 1963-07-22 12:23:08.945284000 -20990937477647027896545805752194610.466 +-1308140366 1646857428 xje xsn sni 1940-07-11 17:37:47.281179000 1963-05-18 08:36:43.510414000 18780715976996462759364270550218115.271 +29469802 -1216257492 ito cni oau 1968-08-01 17:55:05.754128000 1987-10-21 10:06:01.203959000 -12407290757326948589108693553970122.412 +110080312 -1216650714 pkv mxs bvh 1953-12-29 02:06:49.636264000 1939-03-17 18:35:27.517627000 14475911676616657390246943771032941.962 +1555236702 215332734 kfa toa oju 1989-04-07 12:09:29.315407000 1939-01-11 14:49:02.082757000 11696692534024769529358223204869124.981 +-1402251498 214677364 upk oje bvq 1944-03-28 00:07:24.326470000 1963-06-13 10:07:17.684362000 1791910464378082731223143004771916.799 +-1321640988 1646726354 lwr yto gbv 1966-03-07 08:17:32.815453000 1987-12-12 13:07:09.551855000 -13875759698913689806220016538801561.544 +123712008 -1216388566 gbm pbv kvq 1964-11-28 12:10:45.795810000 1987-10-08 09:20:44.116985000 -16644594168563972238152412796776063.452 +1461125570 -1217043936 qlw kfq wrd 1992-12-22 18:39:06.360699000 1939-02-06 16:19:36.256705000 15975534953965043467678008623908439.725 +1541736080 215201660 xsn upb cwi 1978-05-21 02:50:50.242835000 1938-12-29 14:03:44.995783000 -20949398468943777607269739575961236.762 +-1308009292 1646988502 cni cwr wrm 1940-07-24 18:23:04.368153000 1963-05-31 09:22:00.597388000 18822254985699713048640336726451488.975 +29600876 1646333132 mxs grm sey 1968-08-14 18:40:22.841102000 1987-11-03 10:51:18.290933000 -12365751748623698299832627377736748.708 +110211386 -1216519640 toa qcw xje 1954-01-11 02:52:06.723238000 1987-09-25 08:35:27.030011000 14517450685319907679523009947266315.666 +1555367776 215463808 oje xse sny 1989-04-20 12:54:46.402381000 1939-01-24 15:34:19.169731000 11738231542728019818634289381102498.685 +-1402120424 214808438 yto sni fau 1944-04-10 00:52:41.413444000 1963-06-26 10:52:34.771336000 1833449473081333020499209181005290.503 +-1321509914 1646857428 pbv dxs kfa 1966-03-20 09:02:49.902427000 1963-05-18 08:36:43.510414000 -13834220690210439516943950362568187.84 +123843082 -1216257492 kfq tfa oau 1964-12-11 12:56:02.882784000 1987-10-21 10:06:01.203959000 -16603055159860721948876346620542689.748 +1461256644 -1216912862 upb oju bvh 1993-01-04 19:24:23.447673000 1939-02-19 17:04:53.343679000 16017073962668293756954074800141813.429 +1541867154 215332734 cwr ytf gbm 1978-06-03 03:36:07.329809000 1939-01-11 14:49:02.082757000 -20907859460240527317993673399727863.058 +-1307878218 214677364 grm gbv bvq 1940-08-06 19:08:21.455127000 1963-06-13 10:07:17.684362000 18863793994402963337916402902684862.679 +29731950 1646464206 qcw kvq gbv 1968-08-27 19:25:39.928076000 1987-11-16 11:36:35.377907000 -12324212739920448010556561201503375.004 +110342460 -1216388566 xse ugb cni 1954-01-24 03:37:23.810212000 1987-10-08 09:20:44.116985000 14558989694023157968799076123499689.37 +1555498850 -1217043936 sni cwi wrd 1989-05-03 13:40:03.489355000 1939-02-06 16:19:36.256705000 11779770551431270107910355557335872.389 +-1401989350 214939512 dxs wrm cwi 1944-04-23 01:37:58.500418000 1963-07-09 11:37:51.858310000 1874988481784583309775275357238664.207 +-1321378840 1646988502 tfa hcw oje 1966-04-02 09:48:06.989401000 1963-05-31 09:22:00.597388000 -13792681681507189227667884186334814.136 +123974156 1646333132 oju xje sey 1964-12-24 13:41:19.969758000 1987-11-03 10:51:18.290933000 -16561516151157471659600280444309316.044 +1461387718 -1216781788 ytf sny xje 1993-01-17 20:09:40.534647000 1939-03-04 17:50:10.430653000 16058612971371544046230140976375187.133 +\N 215463808 gbv dxj kfq 1978-06-16 04:21:24.416783000 1939-01-24 15:34:19.169731000 -20866320451537277028717607223494489.354 +1541998228 214808438 kvq kfa fau 1940-08-19 19:53:38.542101000 1963-06-26 10:52:34.771336000 18905333003106213627192469078918236.383 +-1307747144 1646595280 ugb oau kfa 1968-09-09 20:10:57.015050000 1987-11-29 12:21:52.464881000 -12282673731217197721280495025270001.3 +29863024 -1216257492 cwi ykf grm 1954-02-06 04:22:40.897186000 1987-10-21 10:06:01.203959000 14600528702726408258075142299733063.074 +110473534 -1216912862 wrm gbm bvh 1989-05-16 14:25:20.576329000 1939-02-19 17:04:53.343679000 11821309560134520397186421733569246.093 +1555629924 215070586 hcw bvq gbm 1944-05-06 02:23:15.587392000 1963-07-22 12:23:08.945284000 1916527490487833599051341533472037.911 +-1401858276 214677364 xje lgb sni 1966-04-15 10:33:24.076375000 1963-06-13 10:07:17.684362000 -13751142672803938938391818010101440.432 +-1321247766 1646464206 sny cni gbv 1965-01-06 14:26:37.056732000 1987-11-16 11:36:35.377907000 -16519977142454221370324214268075942.34 +124105230 -1216650714 dxj wrd cni 1993-01-30 20:54:57.621621000 1939-03-17 18:35:27.517627000 16100151980074794335506207152608560.837 +1461518792 -1217043936 kfa hcn oju 1978-06-29 05:06:41.503757000 1939-02-06 16:19:36.256705000 -20824781442834026739441541047261115.65 +1542129302 214939512 oau oje cwi 1940-09-01 20:38:55.629075000 1963-07-09 11:37:51.858310000 18946872011809463916468535255151610.087 +-1307616070 1646726354 ykf sey oje 1968-09-22 20:56:14.102024000 1987-12-12 13:07:09.551855000 -12241134722513947432004428849036627.596 +29994098 1646333132 gbm doj kvq 1954-02-19 05:07:57.984160000 1987-11-03 10:51:18.290933000 14642067711429658547351208475966436.778 +110604608 -1216781788 bvq kfq xje 1989-05-29 15:10:37.663303000 1939-03-04 17:50:10.430653000 11862848568837770686462487909802619.797 +1555760998 215201660 lgb fau kfq 1944-05-19 03:08:32.674366000 1938-12-29 14:03:44.995783000 1958066499191083888327407709705411.615 +-1401727202 214808438 cni pkf wrm 1966-04-28 11:18:41.163349000 1963-06-26 10:52:34.771336000 -13709603664100688649115751833868066.728 +-1321116692 1646595280 wrd grm kfa 1965-01-19 15:11:54.143706000 1987-11-29 12:21:52.464881000 -16478438133750971081048148091842568.636 +124236304 -1216519640 hcn bvh grm 1993-02-12 21:40:14.708595000 1987-09-25 08:35:27.030011000 16141690988778044624782273328841934.541 +1461649866 -1216912862 oje lgr sny 1978-07-12 05:51:58.590731000 1939-02-19 17:04:53.343679000 -20783242434130776450165474871027741.946 +1542260376 215070586 sey sni gbm 1940-09-14 21:24:12.716049000 1963-07-22 12:23:08.945284000 18988411020512714205744601431384983.791 +-1307484996 1646857428 doj wid sni 1968-10-05 21:41:31.188998000 1963-05-18 08:36:43.510414000 -12199595713810697142728362672803253.892 +30125172 1646464206 kfq hsn oau 1954-03-04 05:53:15.071133000 1987-11-16 11:36:35.377907000 14683606720132908836627274652199810.482 +110735682 -1216650714 fau oju cni 1989-06-11 15:55:54.750277000 1939-03-17 18:35:27.517627000 11904387577541020975738554086035993.501 +1555892072 215332734 pkf jey oju 1944-06-01 03:53:49.761340000 1939-01-11 14:49:02.082757000 1999605507894334177603473885938785.319 +-1401596128 214939512 grm \N bvq 1966-05-11 12:03:58.250323000 1963-07-09 11:37:51.858310000 -13668064655397438359839685657634693.024 +-1320985618 1646726354 bvh toj oje 1965-02-01 15:57:11.230680000 1987-12-12 13:07:09.551855000 -16436899125047720791772081915609194.932 +124367378 -1216388566 lgr kvq kvq 1993-02-25 22:25:31.795569000 1987-10-08 09:20:44.116985000 16183229997481294914058339505075308.245 +1461780940 -1216781788 sni fal wrd 1978-07-25 06:37:15.677705000 1939-03-04 17:50:10.430653000 -20741703425427526160889408694794368.242 +1542391450 215201660 wid pkv kfq 1940-09-27 22:09:29.803023000 1938-12-29 14:03:44.995783000 19029950029215964495020667607618357.495 +-1307353922 1646988502 hsn wrm wrm 1968-10-18 22:26:48.275971000 1963-05-31 09:22:00.597388000 -12158056705107446853452296496569880.188 +30256246 1646595280 oju bmh sey 1954-03-17 06:38:32.158107000 1987-11-29 12:21:52.464881000 14725145728836159125903340828433184.186 +110866756 -1216519640 jey lwr grm 1989-06-24 16:41:11.837251000 1987-09-25 08:35:27.030011000 11945926586244271265014620262269367.205 +1556023146 215463808 toj sny sny 1944-06-14 04:39:06.848314000 1939-01-24 15:34:19.169731000 2041144516597584466879540062172159.023 +-1401465054 215070586 kvq nid fau 1966-05-24 12:49:15.337297000 1963-07-22 12:23:08.945284000 -13626525646694188070563619481401319.32 +-1320854544 1646857428 fal xsn sni 1965-02-14 16:42:28.317654000 1963-05-18 08:36:43.510414000 -16395360116344470502496015739375821.228 +124498452 -1216257492 pkv oau oau 1993-03-10 23:10:48.882543000 1987-10-21 10:06:01.203959000 16224769006184545203334405681308681.949 +1461912014 -1216650714 wrm jep bvh 1978-08-07 07:22:32.764679000 1939-03-17 18:35:27.517627000 -20700164416724275871613342518560994.538 +1542522524 215332734 bmh toa oju 1940-10-10 22:54:46.889997000 1939-01-11 14:49:02.082757000 19071489037919214784296733783851731.199 +-1307222848 214677364 lwr bvq bvq 1968-10-31 23:12:05.362945000 1963-06-13 10:07:17.684362000 -12116517696404196564176230320336506.484 +30387320 1646726354 sny fql \N 1954-03-30 07:23:49.245081000 1987-12-12 13:07:09.551855000 14766684737539409415179407004666557.89 +110997830 -1216388566 nid pbv gbv 1989-07-07 17:26:28.924225000 1987-10-08 09:20:44.116985000 11987465594947521554290686438502740.909 +\N -1217043936 xsn wrd kvq 1944-06-27 05:24:23.935288000 1939-02-06 16:19:36.256705000 2082683525300834756155606238405532.727 +1556154220 215201660 oau upb wrd 1966-06-06 13:34:32.424271000 1938-12-29 14:03:44.995783000 -13584986637990937781287553305167945.616 +-1401333980 1646988502 jep cwr cwi 1965-02-27 17:27:45.404628000 1963-05-31 09:22:00.597388000 -16353821107641220213219949563142447.524 +-1320723470 1646333132 toa sey wrm 1993-03-23 23:56:05.969517000 1987-11-03 10:51:18.290933000 16266308014887795492610471857542055.653 +124629526 -1216519640 bvq qcw sey 1978-08-20 08:07:49.851653000 1987-09-25 08:35:27.030011000 -20658625408021025582337276342327620.834 +1462043088 215463808 fql xse xje 1940-10-23 23:40:03.976971000 1939-01-24 15:34:19.169731000 19113028046622465073572799960085104.903 +1542653598 214808438 pbv fau sny 1968-11-13 23:57:22.449919000 1963-06-26 10:52:34.771336000 -12074978687700946274900164144103132.78 +-1307091774 1646857428 wrd dxs fau 1954-04-12 08:09:06.332055000 1963-05-18 08:36:43.510414000 14808223746242659704455473180899931.594 +30518394 -1216257492 rmh tfa kfa 1989-07-20 18:11:46.011199000 1987-10-21 10:06:01.203959000 12029004603650771843566752614736114.613 +111128904 -1216912862 cwr bvh oau 1944-07-10 06:09:41.022262000 1939-02-19 17:04:53.343679000 2124222534004085045431672414638906.431 +1556285294 215332734 sey ytf bvh 1966-06-19 14:19:49.511245000 1939-01-11 14:49:02.082757000 -13543447629287687492011487128934571.912 +-1401202906 214677364 nit gbv gbm 1965-03-12 18:13:02.491602000 1963-06-13 10:07:17.684362000 -16312282098937969923943883386909073.82 +-1320592396 1646464206 xse wid bvq 1993-04-06 00:41:23.056491000 1987-11-16 11:36:35.377907000 16307847023591045781886538033775429.357 +124760600 -1216388566 fau ugb gbv 1978-09-02 08:53:06.938627000 1987-10-08 09:20:44.116985000 -20617086399317775293061210166094247.13 +1462174162 -1217043936 jup cwi cni 1940-11-06 00:25:21.063945000 1939-02-06 16:19:36.256705000 19154567055325715362848866136318478.607 +1542784672 214939512 tfa jey wrd 1968-11-27 00:42:39.536893000 1963-07-09 11:37:51.858310000 -12033439678997695985624097967869759.076 +-1306960700 1646988502 bvh hcw cwi 1954-04-25 08:54:23.419029000 1963-05-31 09:22:00.597388000 14849762754945909993731539357133305.298 +30649468 1646333132 vql xje oje 1989-08-02 18:57:03.098173000 1987-11-03 10:51:18.290933000 12070543612354022132842818790969488.317 +111259978 -1216781788 gbv fal sey 1944-07-23 06:54:58.109236000 1939-03-04 17:50:10.430653000 2165761542707335334707738590872280.135 +1556416368 215463808 wid dxj xje 1966-07-02 15:05:06.598219000 1939-01-24 15:34:19.169731000 -13501908620584437202735420952701198.208 +-1401071832 214808438 rmx kfa kfq 1965-03-25 18:58:19.578576000 1963-06-26 10:52:34.771336000 -16270743090234719634667817210675700.116 +-1320461322 1646595280 cwi bmh fau 1993-04-19 01:26:40.143465000 1987-11-29 12:21:52.464881000 16349386032294296071162604210008803.061 +124891674 -1216257492 jey ykf kfa 1978-09-15 09:38:24.025600000 1987-10-21 10:06:01.203959000 -20575547390614525003785143989860873.426 +1462305236 -1216912862 nyt gbm grm 1940-11-19 01:10:38.150918000 1939-02-19 17:04:53.343679000 19196106064028965652124932312551852.311 +1542915746 215070586 xje nid bvh 1968-12-10 01:27:56.623867000 1963-07-22 12:23:08.945284000 -11991900670294445696348031791636385.372 +-1306829626 214677364 fal lgb gbm 1954-05-08 09:39:40.506003000 1963-06-13 10:07:17.684362000 14891301763649160283007605533366679.002 +30780542 1646464206 aup cni sni 1989-08-15 19:42:20.185147000 1987-11-16 11:36:35.377907000 12112082621057272422118884967202862.021 +111391052 \N kfa jep gbv 1944-08-05 07:40:15.196210000 1939-03-17 18:35:27.517627000 2207300551410585623983804767105653.839 +1556547442 -1216650714 bmh hcn cni 1966-07-15 15:50:23.685193000 1939-02-06 16:19:36.256705000 -13460369611881186913459354776467824.504 +-1400940758 -1217043936 vqc oje oju 1965-04-07 19:43:36.665550000 1963-07-09 11:37:51.858310000 -16229204081531469345391751034442326.412 +-1320330248 214939512 gbm fql cwi 1993-05-02 02:11:57.230438000 1987-12-12 13:07:09.551855000 16390925040997546360438670386242176.765 +125022748 1646726354 nid doj oje 1978-09-28 10:23:41.112574000 1987-11-03 10:51:18.290933000 -20534008381911274714509077813627499.722 +1462436310 1646333132 rdx kfq kvq 1940-12-02 01:55:55.237892000 1939-03-04 17:50:10.430653000 19237645072732215941400998488785226.015 +1543046820 -1216781788 cni rmh xje 1968-12-23 02:13:13.710841000 1938-12-29 14:03:44.995783000 -11950361661591195407071965615403011.668 +-1306698552 215201660 jep pkf kfq 1954-05-21 10:24:57.592977000 1963-06-26 10:52:34.771336000 14932840772352410572283671709600052.706 +30911616 214808438 eyt grm wrm 1989-08-28 20:27:37.272121000 1987-11-29 12:21:52.464881000 12153621629760522711394951143436235.725 +111522126 1646595280 oje nit kfa 1944-08-18 08:25:32.283184000 1987-09-25 08:35:27.030011000 2248839560113835913259870943339027.543 +1556678516 -1216519640 fql lgr grm 1966-07-28 16:35:40.772167000 1939-02-19 17:04:53.343679000 -13418830603177936624183288600234450.8 +-1400809684 -1216912862 aug sni sny 1965-04-20 20:28:53.752524000 1963-07-22 12:23:08.945284000 -16187665072828219056115684858208952.708 +-1320199174 215070586 kfq jup gbm 1993-05-15 02:57:14.317412000 1963-05-18 08:36:43.510414000 16432464049700796649714736562475550.469 +125153822 1646857428 rmh hsn sni 1978-10-11 11:08:58.199548000 1987-11-16 11:36:35.377907000 -20492469373208024425233011637394126.018 +1462567384 1646464206 vhc oju oau 1940-12-15 02:41:12.324866000 1939-03-17 18:35:27.517627000 19279184081435466230677064665018599.719 +1543177894 -1216650714 grm vql cni 1969-01-05 02:58:30.797815000 1939-01-11 14:49:02.082757000 -11908822652887945117795899439169637.964 +-1306567478 215332734 nit toj oju 1954-06-03 11:10:14.679951000 1963-07-09 11:37:51.858310000 14974379781055660861559737885833426.41 +31042690 214939512 idx kvq bvq 1989-09-10 21:12:54.359095000 1987-12-12 13:07:09.551855000 12195160638463773000671017319669609.429 +111653200 1646726354 sni rmx oje 1944-08-31 09:10:49.370158000 1987-10-08 09:20:44.116985000 2290378568817086202535937119572401.247 +1556809590 -1216388566 jup pkv kvq 1966-08-10 17:20:57.859141000 1939-03-04 17:50:10.430653000 -13377291594474686334907222424001077.096 +-1400678610 -1216781788 eyk wrm wrd 1965-05-03 21:14:10.839498000 1938-12-29 14:03:44.995783000 -16146126064124968766839618681975579.004 +-1320068100 215201660 oju nyt kfq 1993-05-28 03:42:31.404386000 1963-05-31 09:22:00.597388000 16474003058404046938990802738708924.173 +125284896 1646988502 vql lwr wrm 1978-10-24 11:54:15.286522000 1987-11-29 12:21:52.464881000 -20450930364504774135956945461160752.314 +1462698458 1646595280 alg sny sey 1940-12-28 03:26:29.411840000 1987-09-25 08:35:27.030011000 19320723090138716519953130841251973.423 +1543308968 -1216519640 kvq aup grm 1969-01-18 03:43:47.884789000 1939-01-24 15:34:19.169731000 -11867283644184694828519833262936264.26 +-1306436404 215463808 rmx xsn sny 1954-06-16 11:55:31.766925000 1963-07-22 12:23:08.945284000 15015918789758911150835804062066800.114 +31173764 215070586 mhc oau fau 1989-09-23 21:58:11.446069000 1963-05-18 08:36:43.510414000 12236699647167023289947083495902983.133 +111784274 1646857428 wrm vqc sni 1944-09-13 09:56:06.457132000 1987-10-21 10:06:01.203959000 2331917577520336491812003295805774.951 +1556940664 -1216257492 nyt toa oau 1966-08-23 18:06:14.946115000 1939-03-17 18:35:27.517627000 -13335752585771436045631156247767703.392 +-1400547536 -1216650714 ido bvq bvh 1965-05-16 21:59:27.926472000 1939-01-11 14:49:02.082757000 -16104587055421718477563552505742205.3 +-1319937026 215332734 sny rdx oju 1993-06-10 04:27:48.491360000 1963-06-13 10:07:17.684362000 16515542067107297228266868914942297.877 +125415970 214677364 aup pbv bvq 1978-11-06 12:39:32.373496000 1987-12-12 13:07:09.551855000 -20409391355801523846680879284927378.61 +1462829532 1646726354 epk wrd gbv 1941-01-10 04:11:46.498814000 1987-10-08 09:20:44.116985000 19362262098841966809229197017485347.127 +1543440042 -1216388566 oau eyt kvq 1969-01-31 04:29:04.971763000 1939-02-06 16:19:36.256705000 -11825744635481444539243767086702890.556 +-1306305330 -1217043936 vqc cwr wrd 1954-06-29 12:40:48.853899000 1938-12-29 14:03:44.995783000 15057457798462161440111870238300173.818 +31304838 215201660 qlg sey cwi 1989-10-06 22:43:28.533043000 1963-05-31 09:22:00.597388000 12278238655870273579223149672136356.837 +111915348 1646988502 bvq aug wrm 1944-09-26 10:41:23.544106000 1987-11-03 10:51:18.290933000 2373456586223586781088069472039148.655 +1557071738 1646333132 rdx xse sey 1966-09-05 18:51:32.033089000 1987-09-25 08:35:27.030011000 -13294213577068185756355090071534329.688 +-1400416462 -1216519640 mhs fau xje 1965-05-29 22:44:45.013446000 1939-01-24 15:34:19.169731000 -16063048046718468188287486329508831.596 +-1319805952 215463808 wrd vhc sny 1993-06-23 05:13:05.578334000 1963-06-26 10:52:34.771336000 16557081075810547517542935091175671.581 +125547044 214808438 eyt tfa fau 1978-11-19 13:24:49.460470000 1963-05-18 08:36:43.510414000 -20367852347098273557404813108694004.906 +1462960606 1646857428 ito bvh kfa 1941-01-23 04:57:03.585788000 1987-10-21 10:06:01.203959000 19403801107545217098505263193718720.831 +\N -1216257492 sey idx oau 1969-02-13 05:14:22.058737000 1939-02-19 17:04:53.343679000 -11784205626778194249967700910469516.852 +1543571116 -1216912862 aug gbv bvh 1954-07-12 13:26:05.940873000 1939-01-11 14:49:02.082757000 15098996807165411729387936414533547.522 +-1306174256 215332734 upk wid gbm 1989-10-19 23:28:45.620017000 1963-06-13 10:07:17.684362000 12319777664573523868499215848369730.541 +31435912 214677364 fau \N bvq 1944-10-09 11:26:40.631080000 1987-11-16 11:36:35.377907000 2414995594926837070364135648272522.359 +112046422 1646464206 vhc eyk gbv 1966-09-18 19:36:49.120063000 1987-10-08 09:20:44.116985000 -13252674568364935467079023895300955.984 +1557202812 -1216388566 qlw cwi cni 1965-06-11 23:30:02.100420000 1939-02-06 16:19:36.256705000 -16021509038015217899011420153275457.892 +\N -1217043936 bvh jey wrd 1993-07-06 05:58:22.665308000 1963-07-09 11:37:51.858310000 16598620084513797806819001267409045.285 +-1400285388 214939512 idx alg cwi 1978-12-02 14:10:06.547444000 1963-05-31 09:22:00.597388000 -20326313338395023268128746932460631.202 +-1319674878 1646988502 mxs xje oje 1941-02-05 05:42:20.672762000 1987-11-03 10:51:18.290933000 19445340116248467387781329369952094.535 +125678118 1646333132 wid fal sey 1969-02-26 05:59:39.145711000 1939-03-04 17:50:10.430653000 -11742666618074943960691634734236143.148 +1463091680 -1216781788 eyk mhc xje 1954-07-25 14:11:23.027847000 1939-01-24 15:34:19.169731000 15140535815868662018664002590766921.226 +1543702190 215463808 yto kfa kfq 1989-11-02 00:14:02.706991000 1963-06-26 10:52:34.771336000 12361316673276774157775282024603104.245 +\N 214808438 jey bmh fau 1944-10-22 12:11:57.718054000 1987-11-29 12:21:52.464881000 2456534603630087359640201824505896.063 +-1306043182 1646595280 alg ido kfa 1966-10-01 20:22:06.207037000 1987-10-21 10:06:01.203959000 -13211135559661685177802957719067582.28 +31566986 -1216257492 upb gbm grm 1965-06-25 00:15:19.187394000 1939-02-19 17:04:53.343679000 -15979970029311967609735353977042084.188 +112177496 -1216912862 fal nid bvh 1993-07-19 06:43:39.752282000 1963-07-22 12:23:08.945284000 16640159093217048096095067443642418.989 +1557333886 215070586 mhc epk gbm 1978-12-15 14:55:23.634418000 1963-06-13 10:07:17.684362000 -20284774329691772978852680756227257.498 +-1400154314 214677364 qcw cni sni 1941-02-18 06:27:37.759736000 1987-11-16 11:36:35.377907000 19486879124951717677057395546185468.239 +\N 1646464206 bmh jep gbv 1969-03-11 06:44:56.232685000 1939-03-17 18:35:27.517627000 -11701127609371693671415568558002769.444 +-1319543804 -1216650714 ido qlg cni 1954-08-07 14:56:40.114821000 1939-02-06 16:19:36.256705000 15182074824571912307940068767000294.93 +125809192 -1217043936 dxs oje oju 1989-11-15 00:59:19.793965000 1963-07-09 11:37:51.858310000 12402855681980024447051348200836477.949 +1463222754 214939512 nid fql cwi 1944-11-04 12:57:14.805028000 1987-12-12 13:07:09.551855000 2498073612333337648916268000739269.767 +1543833264 1646726354 epk mhs oje 1966-10-14 21:07:23.294011000 \N -13169596550958434888526891542834208.576 +-1305912108 1646333132 ytf kfq kvq 1965-07-08 01:00:36.274368000 1987-11-03 10:51:18.290933000 -15938431020608717320459287800808710.484 +\N -1216781788 jep rmh xje 1993-08-01 07:28:56.839256000 1939-03-04 17:50:10.430653000 16681698101920298385371133619875792.693 +31698060 215201660 qlg ito kfq 1978-12-28 15:40:40.721392000 1938-12-29 14:03:44.995783000 -20243235320988522689576614579993883.794 +112308570 214808438 ugb grm wrm 1941-03-03 07:12:54.846710000 1963-06-26 10:52:34.771336000 19528418133654967966333461722418841.943 +1557464960 1646595280 fql nit kfa 1969-03-24 07:30:13.319659000 1987-11-29 12:21:52.464881000 -11659588600668443382139502381769395.74 +-1400023240 -1216519640 mhs upk grm 1954-08-20 15:41:57.201795000 1987-09-25 08:35:27.030011000 15223613833275162597216134943233668.634 +-1319412730 -1216912862 hcw sni sny 1989-11-28 01:44:36.880939000 1939-02-19 17:04:53.343679000 12444394690683274736327414377069851.653 +\N 215070586 rmh jup gbm 1944-11-17 13:42:31.892002000 1963-07-22 12:23:08.945284000 2539612621036587938192334176972643.471 +125940266 1646857428 ito qlw sni 1966-10-27 21:52:40.380984000 1963-05-18 08:36:43.510414000 -13128057542255184599250825366600834.872 +1463353828 1646464206 dxj oju oau 1965-07-21 01:45:53.361341000 1987-11-16 11:36:35.377907000 -15896892011905467031183221624575336.78 +1543964338 -1216650714 nit vql cni 1993-08-14 08:14:13.926230000 1939-03-17 18:35:27.517627000 16723237110623548674647199796109166.397 +-1305781034 215332734 upk mxs oju 1979-01-10 16:25:57.808366000 1939-01-11 14:49:02.082757000 -20201696312285272400300548403760510.09 +31829134 214939512 ykf kvq bvq 1941-03-16 07:58:11.933684000 1963-07-09 11:37:51.858310000 19569957142358218255609527898652215.647 +\N 1646726354 jup rmx oje 1969-04-06 08:15:30.406633000 1987-12-12 13:07:09.551855000 -11618049591965193092863436205536022.036 +112439644 -1216388566 qlw yto kvq 1954-09-02 16:27:14.288769000 1987-10-08 09:20:44.116985000 15265152841978412886492201119467042.338 +1557596034 -1216781788 lgb wrm wrd 1989-12-11 02:29:53.967913000 1939-03-04 17:50:10.430653000 12485933699386525025603480553303225.357 +-1399892166 215201660 vql nyt kfq 1944-11-30 14:27:48.978976000 1938-12-29 14:03:44.995783000 2581151629739838227468400353206017.175 +-1319281656 1646988502 mxs upb wrm 1966-11-09 22:37:57.467958000 1963-05-31 09:22:00.597388000 -13086518533551934309974759190367461.168 +126071340 1646595280 hcn sny sey 1965-08-03 02:31:10.448315000 1987-11-29 12:21:52.464881000 -15855353003202216741907155448341963.076 +\N -1216519640 rmx aup grm 1993-08-27 08:59:31.013204000 1987-09-25 08:35:27.030011000 16764776119326798963923265972342540.101 +1463484902 215463808 yto qcw sny 1979-01-23 17:11:14.895340000 1939-01-24 15:34:19.169731000 -20160157303582022111024482227527136.386 +1544095412 215070586 doj oau fau 1941-03-29 08:43:29.020658000 1963-07-22 12:23:08.945284000 19611496151061468544885594074885589.351 +-1305649960 1646857428 nyt vqc sni 1969-04-19 09:00:47.493607000 1963-05-18 08:36:43.510414000 -11576510583261942803587370029302648.332 +31960208 -1216257492 upb dxs oau 1954-09-15 17:12:31.375743000 1987-10-21 10:06:01.203959000 15306691850681663175768267295700416.042 +112570718 -1216650714 pkf bvq bvh 1989-12-24 03:15:11.054887000 1939-03-17 18:35:27.517627000 12527472708089775314879546729536599.061 +\N 215332734 aup rdx oju 1944-12-13 15:13:06.065949000 1939-01-11 14:49:02.082757000 2622690638443088516744466529439390.879 +1557727108 214677364 qcw ytf bvq 1966-11-22 23:23:14.554932000 1963-06-13 10:07:17.684362000 -13044979524848684020698693014134087.464 +-1399761092 1646726354 lgr wrd gbv 1965-08-16 03:16:27.535289000 1987-12-12 13:07:09.551855000 -15813813994498966452631089272108589.372 +-1319150582 -1216388566 vqc eyt kvq 1993-09-09 09:44:48.100178000 1987-10-08 09:20:44.116985000 16806315128030049253199332148575913.805 +126202414 -1217043936 dxs ugb wrd 1979-02-05 17:56:31.982314000 1939-02-06 16:19:36.256705000 -20118618294878771821748416051293762.682 +1463615976 215201660 hsn sey cwi 1941-04-11 09:28:46.107632000 1938-12-29 14:03:44.995783000 19653035159764718834161660251118963.055 +\N 1646988502 rdx aug wrm 1969-05-02 09:46:04.580581000 1963-05-31 09:22:00.597388000 -11534971574558692514311303853069274.628 +1544226486 1646333132 ytf hcw sey 1954-09-28 17:57:48.462717000 1987-11-03 10:51:18.290933000 15348230859384913465044333471933789.746 +-1305518886 -1216519640 toj fau xje 1990-01-06 04:00:28.141861000 1987-09-25 08:35:27.030011000 12569011716793025604155612905769972.765 +32091282 215463808 eyt vhc sny 1944-12-26 15:58:23.152923000 1939-01-24 15:34:19.169731000 2664229647146338806020532705672764.583 +112701792 214808438 ugb dxj fau 1966-12-06 00:08:31.641906000 1963-06-26 10:52:34.771336000 -13003440516145433731422626837900713.76 +1557858182 1646857428 pkv bvh kfa 1965-08-29 04:01:44.622263000 1963-05-18 08:36:43.510414000 -15772274985795716163355023095875215.668 +\N -1216257492 aug idx oau 1993-09-22 10:30:05.187152000 1987-10-21 10:06:01.203959000 16847854136733299542475398324809287.509 +-1399630018 -1216912862 hcw ykf bvh 1979-02-18 18:41:49.069288000 1939-02-19 17:04:53.343679000 -20077079286175521532472349875060388.978 +-1319019508 215332734 lwr wid gbm 1941-04-24 10:14:03.194606000 1939-01-11 14:49:02.082757000 19694574168467969123437726427352336.759 +126333488 214677364 vhc eyk bvq 1969-05-15 10:31:21.667555000 1963-06-13 10:07:17.684362000 -11493432565855442225035237676835900.924 +1463747050 1646464206 dxj lgb gbv 1954-10-11 18:43:05.549691000 1987-11-16 11:36:35.377907000 15389769868088163754320399648167163.45 +1544357560 -1216388566 xsn jey cni 1990-01-19 04:45:45.228835000 1987-10-08 09:20:44.116985000 12610550725496275893431679082003346.469 +\N -1217043936 idx alg wrd 1945-01-08 16:43:40.239897000 1939-02-06 16:19:36.256705000 2705768655849589095296598881906138.287 +-1305387812 214939512 ykf hcn cwi 1966-12-19 00:53:48.728880000 1963-07-09 11:37:51.858310000 -12961901507442183442146560661667340.056 +32222356 1646988502 \N fal oje 1965-09-11 04:47:01.709237000 1963-05-31 09:22:00.597388000 -15730735977092465874078956919641841.964 +112832866 1646333132 toa mhc sey 1993-10-05 11:15:22.274126000 1987-11-03 10:51:18.290933000 16889393145436549831751464501042661.213 +1557989256 -1216781788 eyk doj xje 1979-03-03 19:27:06.156262000 1939-03-04 17:50:10.430653000 -20035540277472271243196283698827015.274 +-1399498944 215463808 lgb bmh kfq 1941-05-07 10:59:20.281580000 1939-01-24 15:34:19.169731000 19736113177171219412713792603585710.463 +\N 214808438 pbv ido fau 1969-05-28 11:16:38.754529000 1963-06-26 10:52:34.771336000 -11451893557152191935759171500602527.22 +-1318888434 1646595280 alg pkf kfa 1954-10-24 19:28:22.636665000 1987-11-29 12:21:52.464881000 15431308876791414043596465824400537.154 +126464562 -1216257492 hcn nid grm 1990-02-01 05:31:02.315808000 1987-10-21 10:06:01.203959000 12652089734199526182707745258236720.173 +1463878124 -1216912862 cwr epk bvh 1945-01-21 17:28:57.326871000 1939-02-19 17:04:53.343679000 2747307664552839384572665058139511.991 +1544488634 215070586 mhc lgr gbm 1967-01-01 01:39:05.815854000 1963-07-22 12:23:08.945284000 -12920362498738933152870494485433966.352 +-1305256738 214677364 doj jep sni 1965-09-24 05:32:18.796211000 1963-06-13 10:07:17.684362000 -15689196968389215584802890743408468.26 +\N 1646464206 xse qlg gbv 1993-10-18 12:00:39.361100000 1987-11-16 11:36:35.377907000 16930932154139800121027530677276034.917 +32353430 -1216650714 ido hsn cni 1979-03-16 20:12:23.243236000 1939-03-17 18:35:27.517627000 -19994001268769020953920217522593641.57 +112963940 -1217043936 pkf fql oju 1941-05-20 11:44:37.368554000 1939-02-06 16:19:36.256705000 19777652185874469701989858779819084.167 +1558120330 214939512 tfa mhs cwi 1969-06-10 12:01:55.841503000 1963-07-09 11:37:51.858310000 -11410354548448941646483105324369153.516 +-1399367870 1646726354 epk toj oje 1954-11-06 20:13:39.723639000 1987-12-12 13:07:09.551855000 15472847885494664332872532000633910.858 +-1318757360 1646333132 lgr rmh kvq 1990-02-14 06:16:19.402782000 1987-11-03 10:51:18.290933000 12693628742902776471983811434470093.877 +\N -1216781788 gbv ito xje 1945-02-03 18:14:14.413845000 1939-03-04 17:50:10.430653000 2788846673256089673848731234372885.695 +126595636 215201660 qlg pkv kfq 1967-01-14 02:24:22.902828000 1938-12-29 14:03:44.995783000 -12878823490035682863594428309200592.648 +1464009198 214808438 hsn nit wrm 1965-10-07 06:17:35.883185000 1963-06-26 10:52:34.771336000 -15647657959685965295526824567175094.556 +1544619708 1646595280 cwi upk kfa 1993-10-31 12:45:56.448074000 1987-11-29 12:21:52.464881000 16972471162843050410303596853509408.621 +-1305125664 -1216519640 mhs lwr grm 1979-03-29 20:57:40.330210000 1987-09-25 08:35:27.030011000 -19952462260065770664644151346360267.866 +32484504 -1216912862 toj jup sny 1941-06-02 12:29:54.455528000 1939-02-19 17:04:53.343679000 19819191194577719991265924956052457.871 +\N 215070586 xje qlw gbm 1969-06-23 12:47:12.928477000 1963-07-22 12:23:08.945284000 -11368815539745691357207039148135779.812 +113095014 1646857428 ito xsn sni 1954-11-19 20:58:56.810613000 1963-05-18 08:36:43.510414000 15514386894197914622148598176867284.562 +1558251404 1646464206 pkv vql oau 1990-02-27 07:01:36.489756000 1987-11-16 11:36:35.377907000 12735167751606026761259877610703467.581 +-1399236796 -1216650714 kfa mxs cni 1945-02-16 18:59:31.500819000 1939-03-17 18:35:27.517627000 2830385681959339963124797410606259.399 +-1318626286 215332734 upk toa oju 1967-01-27 03:09:39.989802000 1939-01-11 14:49:02.082757000 -12837284481332432574318362132967218.944 +126726710 214939512 lwr rmx bvq 1965-10-20 07:02:52.970159000 1963-07-09 11:37:51.858310000 -15606118950982715006250758390941720.852 +\N 1646726354 gbm yto oje 1993-11-13 13:31:13.535048000 1987-12-12 13:07:09.551855000 17014010171546300699579663029742782.325 +1464140272 -1216388566 qlw pbv kvq 1979-04-11 21:42:57.417184000 1987-10-08 09:20:44.116985000 -19910923251362520375368085170126894.162 +1544750782 -1216781788 xsn nyt wrd 1941-06-15 13:15:11.542502000 1939-03-04 17:50:10.430653000 19860730203280970280541991132285831.575 +-1304994590 215201660 cni upb kfq 1969-07-06 13:32:30.015451000 1938-12-29 14:03:44.995783000 -11327276531042441067930972971902406.108 +32615578 1646988502 mxs cwr wrm 1954-12-02 21:44:13.897587000 1963-05-31 09:22:00.597388000 15555925902901164911424664353100658.266 +113226088 1646595280 toa aup sey 1990-03-12 07:46:53.576730000 1987-11-29 12:21:52.464881000 12776706760309277050535943786936841.285 +\N -1216519640 oje qcw grm 1945-03-01 19:44:48.587793000 1987-09-25 08:35:27.030011000 2871924690662590252400863586839633.103 +1558382478 215463808 yto xse sny 1967-02-09 03:54:57.076776000 1939-01-24 15:34:19.169731000 -12795745472629182285042295956733845.24 +-1399105722 215070586 pbv vqc fau 1965-11-02 07:48:10.057133000 1963-07-22 12:23:08.945284000 -15564579942279464716974692214708347.148 +-1318495212 1646857428 kfq dxs sni 1993-11-26 14:16:30.622022000 1963-05-18 08:36:43.510414000 17055549180249550988855729205976156.029 +126857784 -1216257492 upb tfa oau 1979-04-24 22:28:14.504158000 1987-10-21 10:06:01.203959000 -19869384242659270086092018993893520.458 +1464271346 -1216650714 cwr rdx bvh 1941-06-28 14:00:28.629476000 1939-03-17 18:35:27.517627000 19902269211984220569818057308519205.279 +\N 215332734 grm ytf oju 1969-07-19 14:17:47.102425000 1939-01-11 14:49:02.082757000 -11285737522339190778654906795669032.404 +1544881856 214677364 qcw gbv bvq 1954-12-15 22:29:30.984561000 1963-06-13 10:07:17.684362000 15597464911604415200700730529334031.97 +-1304863516 1646726354 xse eyt gbv 1990-03-25 08:32:10.663704000 1987-12-12 13:07:09.551855000 12818245769012527339812009963170214.989 +32746652 -1216388566 sni ugb kvq 1945-03-14 20:30:05.674767000 1987-10-08 09:20:44.116985000 2913463699365840541676929763073006.807 +113357162 -1217043936 dxs cwi wrd 1967-02-22 04:40:14.163750000 1939-02-06 16:19:36.256705000 -12754206463925931995766229780500471.536 +1558513552 215201660 tfa aug cwi 1965-11-15 08:33:27.144107000 1938-12-29 14:03:44.995783000 -15523040933576214427698626038474973.444 +\N 1646988502 oju hcw wrm 1993-12-09 15:01:47.708996000 1963-05-31 09:22:00.597388000 17097088188952801278131795382209529.733 +-1398974648 1646333132 ytf xje sey 1979-05-07 23:13:31.591132000 1987-11-03 10:51:18.290933000 -19827845233956019796815952817660146.754 +-1318364138 -1216519640 gbv vhc xje 1941-07-11 14:45:45.716450000 1987-09-25 08:35:27.030011000 19943808220687470859094123484752578.983 +126988858 215463808 kvq dxj sny 1969-08-01 15:03:04.189398000 1939-01-24 15:34:19.169731000 -11244198513635940489378840619435658.7 +1464402420 214808438 ugb kfa fau 1954-12-28 23:14:48.071534000 1963-06-26 10:52:34.771336000 15639003920307665489976796705567405.674 +1545012930 1646857428 cwi idx kfa 1990-04-07 09:17:27.750678000 1963-05-18 08:36:43.510414000 12859784777715777629088076139403588.693 +\N -1216257492 wrm ykf oau 1945-03-27 21:15:22.761741000 1987-10-21 10:06:01.203959000 2955002708069090830952995939306380.511 +-1304732442 -1216912862 hcw gbm bvh 1967-03-07 05:25:31.250724000 1939-02-19 17:04:53.343679000 -12712667455222681706490163604267097.832 +32877726 215332734 xje eyk gbm 1965-11-28 09:18:44.231081000 1939-01-11 14:49:02.082757000 -15481501924872964138422559862241599.74 +113488236 214677364 sny lgb bvq 1993-12-22 15:47:04.795970000 1963-06-13 10:07:17.684362000 17138627197656051567407861558442903.437 +1558644626 1646464206 dxj cni gbv 1979-05-20 23:58:48.678106000 1987-11-16 11:36:35.377907000 -19786306225252769507539886641426773.05 +-1398843574 -1216388566 kfa alg cni 1941-07-24 15:31:02.803424000 1987-10-08 09:20:44.116985000 19985347229390721148370189660985952.687 +\N -1217043936 oau hcn wrd 1969-08-14 15:48:21.276372000 1939-02-06 16:19:36.256705000 -11202659504932690200102774443202284.996 +-1318233064 214939512 ykf oje cwi 1955-01-11 00:00:05.158508000 1963-07-09 11:37:51.858310000 15680542929010915779252862881800779.378 +127119932 1646988502 gbm mhc oje 1990-04-20 10:02:44.837652000 1963-05-31 09:22:00.597388000 12901323786419027918364142315636962.397 +1464533494 1646333132 bvq doj sey 1945-04-09 22:00:39.848715000 1987-11-03 10:51:18.290933000 2996541716772341120229062115539754.215 +1545144004 -1216781788 lgb kfq xje 1967-03-20 06:10:48.337698000 1939-03-04 17:50:10.430653000 -12671128446519431417214097428033724.128 +-1304601368 215463808 cni ido kfq 1965-12-11 10:04:01.318055000 1939-01-24 15:34:19.169731000 -15439962916169713849146493686008226.036 +\N 214808438 wrd pkf fau 1994-01-04 16:32:21.882944000 1963-06-26 10:52:34.771336000 17180166206359301856683927734676277.141 +33008800 1646595280 hcn grm kfa 1979-06-03 00:44:05.765080000 1987-11-29 12:21:52.464881000 -19744767216549519218263820465193399.346 +113619310 -1216257492 oje epk grm 1941-08-06 16:16:19.890398000 1987-10-21 10:06:01.203959000 20026886238093971437646255837219326.391 +1558775700 -1216912862 sey lgr bvh 1969-08-27 16:33:38.363346000 1939-02-19 17:04:53.343679000 -11161120496229439910826708266968911.292 +-1398712500 215070586 doj sni gbm 1955-01-24 00:45:22.245482000 1963-07-22 12:23:08.945284000 15722081937714166068528929058034153.082 +-1318101990 214677364 kfq qlg sni 1990-05-03 10:48:01.924626000 1963-06-13 10:07:17.684362000 12942862795122278207640208491870336.101 +\N 1646464206 fau hsn gbv 1945-04-22 22:45:56.935689000 1987-11-16 11:36:35.377907000 3038080725475591409505128291773127.919 +127251006 -1216650714 pkf oju cni 1967-04-02 06:56:05.424672000 1939-03-17 18:35:27.517627000 -12629589437816181127938031251800350.424 +1464664568 -1217043936 grm mhs oju 1965-12-24 10:49:18.405029000 1939-02-06 16:19:36.256705000 -15398423907466463559870427509774852.332 +1545275078 214939512 bvh toj cwi 1994-01-17 17:17:38.969918000 1963-07-09 11:37:51.858310000 17221705215062552145959993910909650.845 +-1304470294 1646726354 lgr kvq oje 1979-06-16 01:29:22.852054000 1987-12-12 13:07:09.551855000 -19703228207846268928987754288960025.642 +33139874 1646333132 sni ito kvq 1941-08-19 17:01:36.977372000 1987-11-03 10:51:18.290933000 20068425246797221726922322013452700.095 +\N -1216781788 wid pkv xje 1969-09-09 17:18:55.450320000 1939-03-04 17:50:10.430653000 -11119581487526189621550642090735537.588 +113750384 215201660 hsn wrm kfq 1955-02-06 01:30:39.332456000 1938-12-29 14:03:44.995783000 15763620946417416357804995234267526.786 +1558906774 214808438 oju upk wrm 1990-05-16 11:33:19.011600000 1963-06-26 10:52:34.771336000 12984401803825528496916274668103709.805 +-1398581426 1646595280 jey lwr kfa 1945-05-05 23:31:14.022663000 1987-11-29 12:21:52.464881000 3079619734178841698781194468006501.623 +-1317970916 -1216519640 toj sny grm 1967-04-15 07:41:22.511646000 1987-09-25 08:35:27.030011000 -12588050429112930838661965075566976.72 +127382080 -1216912862 kvq qlw sny 1966-01-06 11:34:35.492003000 1939-02-19 17:04:53.343679000 -15356884898763213270594361333541478.628 +\N 215070586 fal xsn gbm 1994-01-30 18:02:56.056892000 1963-07-22 12:23:08.945284000 17263244223765802435236060087143024.549 +1464795642 1646857428 pkv oau sni 1979-06-29 02:14:39.939028000 1963-05-18 08:36:43.510414000 -19661689199143018639711688112726651.938 +1545406152 1646464206 wrm mxs oau 1941-09-01 17:46:54.064346000 1987-11-16 11:36:35.377907000 20109964255500472016198388189686073.799 +-1304339220 -1216650714 bmh toa cni 1969-09-22 18:04:12.537294000 1939-03-17 18:35:27.517627000 -11078042478822939332274575914502163.884 +33270948 215332734 lwr bvq oju 1955-02-19 02:15:56.419430000 1939-01-11 14:49:02.082757000 15805159955120666647081061410500900.49 +113881458 214939512 sny yto bvq 1990-05-29 12:18:36.098574000 1963-07-09 11:37:51.858310000 13025940812528778786192340844337083.509 +\N 1646726354 nid pbv oje 1945-05-19 00:16:31.109637000 1987-12-12 13:07:09.551855000 3121158742882091988057260644239875.327 +1559037848 -1216388566 xsn wrd kvq 1967-04-28 08:26:39.598620000 1987-10-08 09:20:44.116985000 -12546511420409680549385898899333603.016 +-1398450352 -1216781788 oau upb wrd 1966-01-19 12:19:52.578977000 1939-03-04 17:50:10.430653000 -15315345890059962981318295157308104.924 +-1317839842 215201660 jep cwr kfq 1994-02-12 18:48:13.143865000 1938-12-29 14:03:44.995783000 17304783232469052724512126263376398.253 +127513154 1646988502 toa sey wrm 1979-07-12 02:59:57.026001000 1963-05-31 09:22:00.597388000 -19620150190439768350435621936493278.234 +1464926716 1646595280 bvq qcw sey 1941-09-14 18:32:11.151319000 1987-11-29 12:21:52.464881000 20151503264203722305474454365919447.503 +\N -1216519640 fql xse grm 1969-10-05 18:49:29.624268000 1987-09-25 08:35:27.030011000 -11036503470119689042998509738268790.18 +1545537226 215463808 pbv fau sny 1955-03-04 03:01:13.506404000 1939-01-24 15:34:19.169731000 15846698963823916936357127586734274.194 +-1304208146 215070586 wrd dxs fau 1990-06-11 13:03:53.185548000 1963-07-22 12:23:08.945284000 13067479821232029075468407020570457.213 +33402022 1646857428 rmh tfa sni 1945-06-01 01:01:48.196611000 1963-05-18 08:36:43.510414000 3162697751585342277333326820473249.031 +114012532 -1216257492 cwr bvh oau 1967-05-11 09:11:56.685594000 1987-10-21 10:06:01.203959000 -12504972411706430260109832723100229.312 +1559168922 -1216650714 sey ytf bvh 1966-02-01 13:05:09.665951000 1939-03-17 18:35:27.517627000 -15273806881356712692042228981074731.22 +\N 215332734 nit gbv oju 1994-02-25 19:33:30.230839000 1939-01-11 14:49:02.082757000 17346322241172303013788192439609771.957 +-1398319278 214677364 xse wid bvq 1979-07-25 03:45:14.112975000 1963-06-13 10:07:17.684362000 -19578611181736518061159555760259904.53 +-1317708768 1646726354 fau ugb gbv 1941-09-27 19:17:28.238293000 1987-12-12 13:07:09.551855000 20193042272906972594750520542152821.207 +127644228 -1216388566 jup cwi kvq 1969-10-18 19:34:46.711242000 1987-10-08 09:20:44.116985000 -10994964461416438753722443562035416.476 +1465057790 -1217043936 tfa jey wrd 1955-03-17 03:46:30.593378000 1939-02-06 16:19:36.256705000 15888237972527167225633193762967647.898 +1545668300 215201660 bvh hcw cwi \N 1938-12-29 14:03:44.995783000 13109018829935279364744473196803830.917 +\N 1646988502 vql xje wrm 1990-06-24 13:49:10.272522000 1963-05-31 09:22:00.597388000 3204236760288592566609392996706622.735 +-1304077072 1646333132 gbv fal sey 1945-06-14 01:47:05.283585000 1987-11-03 10:51:18.290933000 -12463433403003179970833766546866855.608 +33533096 -1216519640 wid dxj xje 1967-05-24 09:57:13.772568000 1987-09-25 08:35:27.030011000 -15232267872653462402766162804841357.516 +114143606 215463808 rmx kfa sny 1966-02-14 13:50:26.752925000 1939-01-24 15:34:19.169731000 17387861249875553303064258615843145.661 +1559299996 214808438 cwi bmh fau 1994-03-10 20:18:47.317813000 1963-06-26 10:52:34.771336000 -19537072173033267771883489584026530.826 +-1398188204 1646857428 jey ykf kfa 1979-08-07 04:30:31.199949000 1963-05-18 08:36:43.510414000 20234581281610222884026586718386194.911 +\N -1216257492 nyt gbm oau 1941-10-10 20:02:45.325267000 1987-10-21 10:06:01.203959000 -10953425452713188464446377385802042.772 +-1317577694 -1216912862 xje nid bvh 1969-10-31 20:20:03.798216000 1939-02-19 17:04:53.343679000 15929776981230417514909259939201021.602 +127775302 215332734 fal lgb gbm 1955-03-30 04:31:47.680352000 1939-01-11 14:49:02.082757000 13150557838638529654020539373037204.621 +1465188864 214677364 aup cni bvq 1990-07-07 14:34:27.359496000 1963-06-13 10:07:17.684362000 3245775768991842855885459172939996.439 +1545799374 1646464206 kfa jep gbv 1945-06-27 02:32:22.370559000 1987-11-16 11:36:35.377907000 -12421894394299929681557700370633481.904 +-1303945998 -1216388566 bmh hcn cni 1967-06-06 10:42:30.859542000 1987-10-08 09:20:44.116985000 -15190728863950212113490096628607983.812 +\N -1217043936 vqc oje wrd 1966-02-27 14:35:43.839899000 1939-02-06 16:19:36.256705000 17429400258578803592340324792076519.365 +33664170 214939512 gbm fql cwi 1994-03-23 21:04:04.404787000 1963-07-09 11:37:51.858310000 -19495533164330017482607423407793157.122 +114274680 1646988502 nid doj oje 1979-08-20 05:15:48.286923000 1963-05-31 09:22:00.597388000 20276120290313473173302652894619568.615 +1559431070 1646333132 rdx kfq sey 1941-10-23 20:48:02.412241000 1987-11-03 10:51:18.290933000 -10911886444009938175170311209568669.068 +-1398057130 -1216781788 cni rmh xje 1969-11-13 21:05:20.885190000 1939-03-04 17:50:10.430653000 15971315989933667804185326115434395.306 +-1317446620 215463808 jep pkf kfq 1955-04-12 05:17:04.767326000 1939-01-24 15:34:19.169731000 13192096847341779943296605549270578.325 +\N 214808438 eyt grm fau 1990-07-20 15:19:44.446470000 1963-06-26 10:52:34.771336000 3287314777695093145161525349173370.143 +127906376 1646595280 oje nit kfa 1945-07-10 03:17:39.457533000 1987-11-29 12:21:52.464881000 -12380355385596679392281634194400108.2 +1465319938 -1216257492 fql lgr grm 1967-06-19 11:27:47.946516000 1987-10-21 10:06:01.203959000 -15149189855246961824214030452374610.108 +1545930448 -1216912862 aug sni bvh 1966-03-12 15:21:00.926873000 1939-02-19 17:04:53.343679000 17470939267282053881616390968309893.069 +-1303814924 215070586 \N jup gbm 1994-04-05 21:49:21.491761000 1963-07-22 12:23:08.945284000 -19453994155626767193331357231559783.418 +33795244 214677364 kfq hsn sni 1979-09-02 06:01:05.373897000 1963-06-13 10:07:17.684362000 20317659299016723462578719070852942.319 +\N 1646464206 rmh oju gbv 1941-11-05 21:33:19.499215000 1987-11-16 11:36:35.377907000 -10870347435306687885894245033335295.364 +114405754 -1216650714 vhc vql cni 1969-11-26 21:50:37.972164000 1939-03-17 18:35:27.517627000 16012854998636918093461392291667769.01 +1559562144 -1217043936 grm toj oju 1955-04-25 06:02:21.854300000 1939-02-06 16:19:36.256705000 13233635856045030232572671725503952.029 +-1397926056 214939512 nit kvq cwi 1990-08-02 16:05:01.533444000 1963-07-09 11:37:51.858310000 3328853786398343434437591525406743.847 +-1317315546 1646726354 idx rmx oje 1945-07-23 04:02:56.544507000 1987-12-12 13:07:09.551855000 -12338816376893429103005568018166734.496 +128037450 1646333132 sni pkv kvq 1967-07-02 12:13:05.033490000 1987-11-03 10:51:18.290933000 -15107650846543711534937964276141236.404 +\N -1216781788 jup wrm xje 1966-03-25 16:06:18.013847000 1939-03-04 17:50:10.430653000 17512478275985304170892457144543266.773 +1465451012 215201660 eyk nyt kfq 1994-04-18 22:34:38.578735000 1938-12-29 14:03:44.995783000 -19412455146923516904055291055326409.714 +1546061522 214808438 oju lwr wrm 1979-09-15 06:46:22.460871000 1963-06-26 10:52:34.771336000 20359198307719973751854785247086316.023 +-1303683850 1646595280 vql sny kfa 1941-11-18 22:18:36.586189000 1987-11-29 12:21:52.464881000 -10828808426603437596618178857101921.66 +33926318 -1216519640 alg aup grm 1969-12-09 22:35:55.059138000 1987-09-25 08:35:27.030011000 16054394007340168382737458467901142.714 +114536828 -1216912862 kvq xsn sny 1955-05-08 06:47:38.941274000 1939-02-19 17:04:53.343679000 13275174864748280521848737901737325.733 +\N 215070586 rmx oau gbm 1990-08-15 16:50:18.620418000 1963-07-22 12:23:08.945284000 3370392795101593723713657701640117.551 +1559693218 1646857428 mhc vqc sni 1945-08-05 04:48:13.631481000 1963-05-18 08:36:43.510414000 -12297277368190178813729501841933360.792 +-1397794982 1646464206 wrm toa oau 1967-07-15 12:58:22.120464000 1987-11-16 11:36:35.377907000 -15066111837840461245661898099907862.7 +-1317184472 -1216650714 nyt bvq cni 1966-04-07 16:51:35.100821000 1939-03-17 18:35:27.517627000 17554017284688554460168523320776640.477 +128168524 215332734 ido rdx oju 1994-05-01 23:19:55.665709000 1939-01-11 14:49:02.082757000 -19370916138220266614779224879093036.01 +1465582086 214939512 sny pbv bvq 1979-09-28 07:31:39.547845000 1963-07-09 11:37:51.858310000 20400737316423224041130851423319689.727 +\N 1646726354 aup wrd oje 1941-12-01 23:03:53.673163000 1987-12-12 13:07:09.551855000 -10787269417900187307342112680868547.956 +1546192596 -1216388566 epk eyt kvq 1969-12-22 23:21:12.146112000 1987-10-08 09:20:44.116985000 16095933016043418672013524644134516.418 +-1303552776 -1216781788 oau cwr wrd 1955-05-21 07:32:56.028248000 1939-03-04 17:50:10.430653000 13316713873451530811124804077970699.437 +34057392 215201660 vqc sey kfq 1990-08-28 17:35:35.707392000 1938-12-29 14:03:44.995783000 3411931803804844012989723877873491.255 +114667902 1646988502 qlg aug wrm 1945-08-18 05:33:30.718455000 1963-05-31 09:22:00.597388000 -12255738359486928524453435665699987.088 +1559824292 1646595280 bvq xse sey 1967-07-28 13:43:39.207438000 1987-11-29 12:21:52.464881000 -15024572829137210956385831923674488.996 +\N -1216519640 rdx fau grm 1966-04-20 17:36:52.187795000 1987-09-25 08:35:27.030011000 17595556293391804749444589497010014.181 +-1397663908 215463808 mhs vhc sny 1994-05-15 00:05:12.752683000 1939-01-24 15:34:19.169731000 -19329377129517016325503158702859662.306 +-1317053398 215070586 wrd tfa fau 1979-10-11 08:16:56.634819000 1963-07-22 12:23:08.945284000 20442276325126474330406917599553063.431 +128299598 1646857428 eyt bvh sni 1941-12-14 23:49:10.760137000 1963-05-18 08:36:43.510414000 -10745730409196937018066046504635174.252 +1465713160 -1216257492 ito idx oau 1933-06-24 00:08:04.626239000 1987-10-21 10:06:01.203959000 16137472024746668961289590820367890.122 +1546323670 -1216650714 sey gbv bvh 1955-06-03 08:18:13.115222000 1939-03-17 18:35:27.517627000 13358252882154781100400870254204073.141 +\N 215332734 aug wid oju 1990-09-10 18:20:52.794366000 1939-01-11 14:49:02.082757000 3453470812508094302265790054106864.959 +-1303421702 214677364 upk eyk bvq 1945-08-31 06:18:47.805429000 1963-06-13 10:07:17.684362000 -12214199350783678235177369489466613.384 +34188466 1646726354 fau cwi gbv 1967-08-10 14:28:56.294412000 1987-12-12 13:07:09.551855000 \N +114798976 -1216388566 vhc jey kvq 1966-05-03 18:22:09.274768000 1987-10-08 09:20:44.116985000 -14983033820433960667109765747441115.292 +1559955366 -1217043936 qlw alg wrd 1994-05-28 00:50:29.839657000 1939-02-06 16:19:36.256705000 17637095302095055038720655673243387.885 +-1397532834 215201660 bvh xje cwi 1979-10-24 09:02:13.721793000 1938-12-29 14:03:44.995783000 -19287838120813766036227092526626288.602 +\N 1646988502 idx fal wrm 1941-12-28 00:34:27.847111000 1963-05-31 09:22:00.597388000 20483815333829724619682983775786437.135 +-1316922324 1646333132 mxs mhc sey 1933-07-07 00:53:21.713213000 1987-11-03 10:51:18.290933000 -10704191400493686728789980328401800.548 +128430672 -1216519640 wid kfa xje 1955-06-16 09:03:30.202196000 1987-09-25 08:35:27.030011000 16179011033449919250565656996601263.826 +1465844234 215463808 eyk bmh sny 1990-09-23 19:06:09.881340000 1939-01-24 15:34:19.169731000 13399791890858031389676936430437446.845 +1546454744 214808438 yto ido fau 1945-09-13 07:04:04.892403000 1963-06-26 10:52:34.771336000 3495009821211344591541856230340238.663 +-1303290628 1646857428 jey gbm kfa 1967-08-23 15:14:13.381385000 1963-05-18 08:36:43.510414000 -12172660342080427945901303313233239.68 +\N -1216257492 alg nid oau 1966-05-16 19:07:26.361742000 1987-10-21 10:06:01.203959000 -14941494811730710377833699571207741.588 +34319540 -1216912862 upb epk bvh 1994-06-10 01:35:46.926631000 1939-02-19 17:04:53.343679000 17678634310798305327996721849476761.589 +114930050 215332734 fal cni gbm 1979-11-06 09:47:30.808767000 1939-01-11 14:49:02.082757000 -19246299112110515746951026350392914.898 +1560086440 214677364 mhc jep bvq 1942-01-10 01:19:44.934085000 1963-06-13 10:07:17.684362000 20525354342532974908959049952019810.839 +-1397401760 1646464206 qcw qlg gbv 1933-07-20 01:38:38.800187000 1987-11-16 11:36:35.377907000 -10662652391790436439513914152168426.844 +-1316791250 -1216388566 bmh oje cni 1955-06-29 09:48:47.289170000 1987-10-08 09:20:44.116985000 16220550042153169539841723172834637.53 +\N -1217043936 ido fql wrd 1990-10-06 19:51:26.968314000 1939-02-06 16:19:36.256705000 13441330899561281678953002606670820.549 +128561746 214939512 dxs mhs \N 1945-09-26 07:49:21.979376000 1963-07-09 11:37:51.858310000 3536548829914594880817922406573612.367 +1465975308 1646988502 nid kfq cwi 1967-09-05 15:59:30.468359000 1963-05-31 09:22:00.597388000 -12131121333377177656625237136999865.976 +1546585818 1646333132 epk rmh oje 1966-05-29 19:52:43.448716000 1987-11-03 10:51:18.290933000 -14899955803027460088557633394974367.884 +-1303159554 -1216781788 ytf ito sey 1994-06-23 02:21:04.013605000 1939-03-04 17:50:10.430653000 17720173319501555617272788025710135.293 +34450614 215463808 jep grm xje 1979-11-19 10:32:47.895741000 1939-01-24 15:34:19.169731000 -19204760103407265457674960174159541.194 +\N 214808438 qlg nit kfq 1942-01-23 02:05:02.021059000 1963-06-26 10:52:34.771336000 20566893351236225198235116128253184.543 +115061124 1646595280 ugb upk fau 1933-08-02 02:23:55.887161000 1987-11-29 12:21:52.464881000 -10621113383087186150237847975935053.14 +1560217514 -1216257492 fql sni kfa 1955-07-12 10:34:04.376144000 1987-10-21 10:06:01.203959000 16262089050856419829117789349068011.234 +-1397270686 -1216912862 mhs jup grm 1990-10-19 20:36:44.055288000 1939-02-19 17:04:53.343679000 13482869908264531968229068782904194.253 +-1316660176 215070586 hcw qlw bvh 1945-10-09 08:34:39.066350000 1963-07-22 12:23:08.945284000 3578087838617845170093988582806986.071 +128692820 214677364 rmh oju gbm 1967-09-18 16:44:47.555333000 1963-06-13 10:07:17.684362000 -12089582324673927367349170960766492.272 +\N 1646464206 ito vql sni 1966-06-11 20:38:00.535690000 1987-11-16 11:36:35.377907000 -14858416794324209799281567218740994.18 +1466106382 -1216650714 dxj mxs gbv 1994-07-06 03:06:21.100579000 1939-03-17 18:35:27.517627000 17761712328204805906548854201943508.997 +1546716892 -1217043936 nit kvq cni 1979-12-02 11:18:04.982715000 1939-02-06 16:19:36.256705000 -19163221094704015168398893997926167.49 +-1303028480 214939512 upk rmx oju 1942-02-05 02:50:19.108033000 1963-07-09 11:37:51.858310000 20608432359939475487511182304486558.247 +34581688 1646726354 ykf yto cwi 1933-08-15 03:09:12.974135000 1987-12-12 13:07:09.551855000 -10579574374383935860961781799701679.436 +115192198 1646333132 jup wrm oje 1955-07-25 11:19:21.463118000 1987-11-03 10:51:18.290933000 16303628059559670118393855525301384.938 +\N -1216781788 qlw nyt kvq 1990-11-01 21:22:01.142262000 1939-03-04 17:50:10.430653000 13524408916967782257505134959137567.957 +1560348588 215201660 lgb upb xje 1945-10-22 09:19:56.153324000 1938-12-29 14:03:44.995783000 3619626847321095459370054759040359.775 +-1397139612 214808438 vql sny kfq 1967-10-01 17:30:04.642307000 1963-06-26 10:52:34.771336000 -12048043315970677078073104784533118.568 +-1316529102 1646595280 mxs aup wrm 1966-06-24 21:23:17.622664000 1987-11-29 12:21:52.464881000 -14816877785620959510005501042507620.476 +128823894 -1216519640 hcn qcw kfa 1994-07-19 03:51:38.187553000 1987-09-25 08:35:27.030011000 17803251336908056195824920378176882.701 +1466237456 -1216912862 rmx oau grm 1979-12-15 12:03:22.069689000 1939-02-19 17:04:53.343679000 -19121682086000764879122827821692793.786 +\N 215070586 yto vqc sny 1942-02-18 03:35:36.195007000 1963-07-22 12:23:08.945284000 20649971368642725776787248480719931.951 +1546847966 1646857428 doj dxs gbm 1933-08-28 03:54:30.061109000 1963-05-18 08:36:43.510414000 -10538035365680685571685715623468305.732 +-1302897406 1646464206 nyt bvq \N 1955-08-07 12:04:38.550092000 1987-11-16 11:36:35.377907000 16345167068262920407669921701534758.642 +34712762 -1216650714 upb rdx sni 1990-11-14 22:07:18.229236000 1939-03-17 18:35:27.517627000 13565947925671032546781201135370941.661 +115323272 215332734 pkf ytf oau 1945-11-04 10:05:13.240298000 1939-01-11 14:49:02.082757000 \N +1560479662 214939512 aup wrd cni 1967-10-14 18:15:21.729281000 1963-07-09 11:37:51.858310000 3661165856024345748646120935273733.479 +\N 1646726354 qcw eyt oju 1966-07-07 22:08:34.709638000 1987-12-12 13:07:09.551855000 -12006504307267426788797038608299744.864 +-1397008538 -1216388566 lgr ugb bvq 1994-08-01 04:36:55.274527000 1987-10-08 09:20:44.116985000 -14775338776917709220729434866274246.772 +-1316398028 -1216781788 vqc sey oje 1979-12-28 12:48:39.156663000 1939-03-04 17:50:10.430653000 17844790345611306485100986554410256.405 +128954968 215201660 dxs aug kvq 1942-03-03 04:20:53.281981000 1938-12-29 14:03:44.995783000 -19080143077297514589846761645459420.082 +1466368530 1646988502 hsn hcw wrd 1933-09-10 04:39:47.148083000 1963-05-31 09:22:00.597388000 20691510377345976066063314656953305.655 +1546979040 1646595280 rdx fau kfq 1955-08-20 12:49:55.637066000 1987-11-29 12:21:52.464881000 -10496496356977435282409649447234932.028 +\N -1216519640 ytf vhc wrm 1990-11-27 22:52:35.316209000 1987-09-25 08:35:27.030011000 16386706076966170696945987877768132.346 +-1302766332 215463808 toj dxj sey 1945-11-17 10:50:30.327272000 1939-01-24 15:34:19.169731000 13607486934374282836057267311604315.365 +34843836 215070586 eyt bvh grm 1967-10-27 19:00:38.816255000 1963-07-22 12:23:08.945284000 3702704864727596037922187111507107.183 +115454346 1646857428 ugb idx sny 1966-07-20 22:53:51.796612000 1963-05-18 08:36:43.510414000 -11964965298564176499520972432066371.16 +1560610736 -1216257492 pkv ykf fau 1994-08-14 05:22:12.361501000 1987-10-21 10:06:01.203959000 -14733799768214458931453368690040873.068 +-1396877464 -1216650714 aug wid sni 1980-01-10 13:33:56.243637000 1939-03-17 18:35:27.517627000 17886329354314556774377052730643630.109 +\N 215332734 hcw eyk oau 1942-03-16 05:06:10.368955000 1939-01-11 14:49:02.082757000 -19038604068594264300570695469226046.378 +-1316266954 214677364 lwr lgb bvh 1933-09-23 05:25:04.235057000 1963-06-13 10:07:17.684362000 20733049386049226355339380833186679.359 +129086042 1646726354 vhc jey oju 1955-09-02 13:35:12.724040000 1987-12-12 13:07:09.551855000 -10454957348274184993133583271001558.324 +1466499604 -1216388566 dxj alg bvq 1990-12-10 23:37:52.403183000 1987-10-08 09:20:44.116985000 16428245085669420986222054054001506.05 +1547110114 -1217043936 xsn hcn gbv 1945-11-30 11:35:47.414246000 1939-02-06 16:19:36.256705000 13649025943077533125333333487837689.069 +-1302635258 215201660 idx fal kvq 1967-11-09 19:45:55.903229000 1938-12-29 14:03:44.995783000 3744243873430846327198253287740480.887 +\N 1646988502 ykf mhc wrd 1966-08-02 23:39:08.883586000 1963-05-31 09:22:00.597388000 -11923426289860926210244906255832997.456 +34974910 \N toa doj cwi 1994-08-27 06:07:29.448475000 1987-11-03 10:51:18.290933000 -14692260759511208642177302513807499.364 +115585420 1646333132 eyk bmh wrm 1980-01-23 14:19:13.330611000 1987-09-25 08:35:27.030011000 17927868363017807063653118906877003.813 +1560741810 -1216519640 lgb ido sey 1942-03-29 05:51:27.455929000 1939-01-24 15:34:19.169731000 -18997065059891014011294629292992672.674 +-1396746390 215463808 pbv pkf xje 1933-10-06 06:10:21.322031000 1963-06-26 10:52:34.771336000 20774588394752476644615447009420053.063 +-1316135880 214808438 alg nid sny 1955-09-15 14:20:29.811014000 1963-05-18 08:36:43.510414000 -10413418339570934703857517094768184.62 +\N 1646857428 hcn epk fau 1990-12-24 00:23:09.490157000 1987-10-21 10:06:01.203959000 16469784094372671275498120230234879.754 +129217116 -1216257492 cwr lgr kfa 1945-12-13 12:21:04.501220000 1939-02-19 17:04:53.343679000 13690564951780783414609399664071062.773 +1466630678 -1216912862 mhc jep oau 1967-11-22 20:31:12.990203000 1939-01-11 14:49:02.082757000 3785782882134096616474319463973854.591 +1547241188 215332734 doj qlg bvh 1966-08-16 00:24:25.970560000 1963-06-13 10:07:17.684362000 -11881887281157675920968840079599623.752 +-1302504184 214677364 xse hsn gbm 1994-09-09 06:52:46.535449000 1987-11-16 11:36:35.377907000 -14650721750807958352901236337574125.66 +35105984 1646464206 ido fql bvq 1980-02-05 15:04:30.417585000 1987-10-08 09:20:44.116985000 17969407371721057352929185083110377.517 +\N -1216388566 \N mhs gbv 1942-04-11 06:36:44.542903000 1939-02-06 16:19:36.256705000 -18955526051187763722018563116759298.97 +115716494 -1217043936 pkf toj cni 1933-10-19 06:55:38.409005000 1963-07-09 11:37:51.858310000 20816127403455726933891513185653426.767 +1560872884 214939512 tfa rmh wrd 1955-09-28 15:05:46.897988000 1963-05-31 09:22:00.597388000 -10371879330867684414581450918534810.916 +-1396615316 1646988502 epk ito cwi 1991-01-06 01:08:26.577131000 1987-11-03 10:51:18.290933000 16511323103075921564774186406468253.458 +-1316004806 1646333132 lgr pkv oje 1945-12-26 13:06:21.588194000 1939-03-04 17:50:10.430653000 13732103960484033703885465840304436.477 +129348190 -1216781788 gbv nit sey 1967-12-05 21:16:30.077177000 1939-01-24 15:34:19.169731000 3827321890837346905750385640207228.295 +\N 215463808 qlg upk xje 1966-08-29 01:09:43.057534000 1963-06-26 10:52:34.771336000 -11840348272454425631692773903366250.048 +1466761752 214808438 hsn lwr kfq 1994-09-22 07:38:03.622423000 1987-11-29 12:21:52.464881000 -14609182742104708063625170161340751.956 +1547372262 1646595280 cwi jup fau 1980-02-18 15:49:47.504559000 1987-10-21 10:06:01.203959000 18010946380424307642205251259343751.221 +-1302373110 -1216257492 mhs qlw kfa 1942-04-24 07:22:01.629877000 1939-02-19 17:04:53.343679000 -18913987042484513432742496940525925.266 +35237058 -1216912862 toj xsn grm 1933-11-01 07:40:55.495979000 1963-07-22 12:23:08.945284000 20857666412158977223167579361886800.471 +115847568 215070586 xje vql bvh 1955-10-11 15:51:03.984961000 1963-06-13 10:07:17.684362000 -10330340322164434125305384742301437.212 +\N 214677364 ito mxs gbm 1991-01-19 01:53:43.664105000 1987-11-16 11:36:35.377907000 16552862111779171854050252582701627.162 +1561003958 1646464206 pkv toa sni 1946-01-08 13:51:38.675168000 1939-03-17 18:35:27.517627000 13773642969187283993161532016537810.181 +-1396484242 -1216650714 kfa rmx gbv 1967-12-18 22:01:47.164151000 1939-02-06 16:19:36.256705000 3868860899540597195026451816440601.999 +-1315873732 -1217043936 upk yto cni 1966-09-11 01:55:00.144508000 1963-07-09 11:37:51.858310000 -11798809263751175342416707727132876.344 +129479264 214939512 lwr pbv oju 1994-10-05 08:23:20.709397000 1987-12-12 13:07:09.551855000 -14567643733401457774349103985107378.252 +1466892826 1646726354 gbm nyt cwi 1980-03-02 16:35:04.591533000 1987-11-03 10:51:18.290933000 18052485389127557931481317435577124.925 +\N 1646333132 qlw upb oje 1942-05-07 08:07:18.716851000 1939-03-04 17:50:10.430653000 -18872448033781263143466430764292551.562 +1547503336 -1216781788 xsn cwr kvq 1933-11-14 08:26:12.582953000 1938-12-29 14:03:44.995783000 20899205420862227512443645538120174.175 +-1302242036 215201660 cni aup xje 1955-10-24 16:36:21.071935000 1963-06-26 10:52:34.771336000 -10288801313461183836029318566068063.508 +35368132 214808438 mxs qcw kfq 1991-02-01 02:39:00.751079000 1987-11-29 12:21:52.464881000 16594401120482422143326318758935000.866 +115978642 1646595280 toa xse wrm 1946-01-21 14:36:55.762142000 1987-09-25 08:35:27.030011000 13815181977890534282437598192771183.885 +1561135032 -1216519640 oje vqc kfa 1967-12-31 22:47:04.251125000 1939-02-19 17:04:53.343679000 3910399908243847484302517992673975.703 +\N -1216912862 yto dxs grm 1966-09-24 02:40:17.231482000 1963-07-22 12:23:08.945284000 -11757270255047925053140641550899502.64 +-1396353168 215070586 pbv tfa sny 1994-10-18 09:08:37.796371000 1963-05-18 08:36:43.510414000 -14526104724698207485073037808874004.548 +-1315742658 1646857428 kfq rdx gbm 1980-03-15 17:20:21.678507000 1987-11-16 11:36:35.377907000 18094024397830808220757383611810498.629 +129610338 1646464206 upb ytf sni 1942-05-20 08:52:35.803825000 1939-03-17 18:35:27.517627000 -18830909025078012854190364588059177.858 +1467023900 -1216650714 cwr gbv oau 1933-11-27 09:11:29.669926000 1939-01-11 14:49:02.082757000 20940744429565477801719711714353547.879 +1547634410 215332734 grm eyt cni 1955-11-06 17:21:38.158909000 1963-07-09 11:37:51.858310000 -10247262304757933546753252389834689.804 +\N 214939512 qcw ugb oju 1991-02-14 03:24:17.838053000 1987-12-12 13:07:09.551855000 16635940129185672432602384935168374.57 +-1302110962 1646726354 xse cwi bvq 1946-02-03 15:22:12.849116000 1987-10-08 09:20:44.116985000 13856720986593784571713664369004557.589 +35499206 -1216388566 sni aug oje 1968-01-13 23:32:21.338099000 1939-03-04 17:50:10.430653000 3951938916947097773578584168907349.407 +116109716 -1216781788 dxs hcw kvq 1966-10-07 03:25:34.318456000 1938-12-29 14:03:44.995783000 -11715731246344674763864575374666128.936 +1561266106 215201660 tfa xje wrd 1994-10-31 09:53:54.883345000 1963-05-31 09:22:00.597388000 -14484565715994957195796971632640630.844 +-1396222094 1646988502 oju vhc kfq 1980-03-28 18:05:38.765481000 1987-11-29 12:21:52.464881000 18135563406534058510033449788043872.333 +\N 1646595280 ytf dxj wrm 1942-06-02 09:37:52.890799000 1987-09-25 08:35:27.030011000 -18789370016374762564914298411825804.154 +-1315611584 -1216519640 gbv kfa sey 1933-12-10 09:56:46.756900000 1939-01-24 15:34:19.169731000 20982283438268728090995777890586921.583 +129741412 215463808 kvq idx grm 1955-11-19 18:06:55.245883000 1963-07-22 12:23:08.945284000 -10205723296054683257477186213601316.1 +1467154974 215070586 ugb ykf sny 1991-02-27 04:09:34.925027000 1963-05-18 08:36:43.510414000 16677479137888922721878451111401748.274 +1547765484 1646857428 cwi gbm fau 1946-02-16 16:07:29.936090000 1987-10-21 10:06:01.203959000 13898259995297034860989730545237931.293 +-1301979888 -1216257492 wrm eyk sni 1968-01-27 00:17:38.425073000 1939-03-17 18:35:27.517627000 3993477925650348062854650345140723.111 +\N -1216650714 hcw lgb oau 1966-10-20 04:10:51.405430000 1939-01-11 14:49:02.082757000 -11674192237641424474588509198432755.232 +35630280 215332734 xje cni bvh 1994-11-13 10:39:11.970319000 1963-06-13 10:07:17.684362000 -14443026707291706906520905456407257.14 +116240790 214677364 sny alg oju 1980-04-10 18:50:55.852455000 1987-12-12 13:07:09.551855000 18177102415237308799309515964277246.037 +1561397180 1646726354 dxj hcn bvq 1942-06-15 10:23:09.977773000 1987-10-08 09:20:44.116985000 -18747831007671512275638232235592430.45 +-1396091020 -1216388566 kfa oje gbv 1933-12-23 10:42:03.843874000 1939-02-06 16:19:36.256705000 21023822446971978380271844066820295.287 +-1315480510 -1217043936 oau mhc kvq 1955-12-02 18:52:12.332857000 1938-12-29 14:03:44.995783000 -10164184287351432968201120037367942.396 +\N 215201660 ykf doj wrd 1991-03-12 04:54:52.012001000 1963-05-31 09:22:00.597388000 16719018146592173011154517287635121.978 +129872486 1646988502 gbm kfq cwi 1946-03-01 16:52:47.023064000 1987-11-03 10:51:18.290933000 13939799004000285150265796721471304.997 +1467286048 1646333132 bvq ido wrm 1968-02-09 01:02:55.512047000 1987-09-25 08:35:27.030011000 4035016934353598352130716521374096.815 +1547896558 -1216519640 lgb pkf sey 1966-11-02 04:56:08.492404000 1939-01-24 15:34:19.169731000 -11632653228938174185312443022199381.528 +-1301848814 215463808 cni grm xje 1994-11-26 11:24:29.057293000 1963-06-26 10:52:34.771336000 -14401487698588456617244839280173883.436 +35761354 214808438 wrd epk sny 1980-04-23 19:36:12.939428000 1963-05-18 08:36:43.510414000 18218641423940559088585582140510619.741 +\N 1646857428 hcn lgr fau 1942-06-28 11:08:27.064746000 1987-10-21 10:06:01.203959000 -18706291998968261986362166059359056.746 +116371864 -1216257492 oje sni kfa 1934-01-05 11:27:20.930848000 1939-02-19 17:04:53.343679000 21065361455675228669547910243053668.991 +1561528254 -1216912862 sey qlg oau 1955-12-15 19:37:29.419831000 1939-01-11 14:49:02.082757000 -10122645278648182678925053861134568.692 +-1395959946 215332734 doj hsn bvh 1991-03-25 05:40:09.098975000 1963-06-13 10:07:17.684362000 16760557155295423300430583463868495.682 +-1315349436 214677364 kfq oju gbm 1946-03-14 17:38:04.110038000 1987-11-16 11:36:35.377907000 13981338012703535439541862897704678.701 +130003560 1646464206 fau mhs bvq 1968-02-22 01:48:12.599021000 1987-10-08 09:20:44.116985000 4076555943056848641406782697607470.519 +\N -1216388566 pkf toj gbv 1966-11-15 05:41:25.579378000 1939-02-06 16:19:36.256705000 -11591114220234923896036376845966007.824 +1467417122 -1217043936 grm kvq cni 1994-12-09 12:09:46.144266000 1963-07-09 11:37:51.858310000 -14359948689885206327968773103940509.732 +1548027632 214939512 bvh ito wrd 1980-05-06 20:21:30.026402000 1963-05-31 09:22:00.597388000 18260180432643809377861648316743993.445 +-1301717740 1646988502 lgr pkv cwi 1942-07-11 11:53:44.151720000 1987-11-03 10:51:18.290933000 -18664752990265011697086099883125683.042 +35892428 1646333132 sni wrm oje 1934-01-18 12:12:38.017822000 1939-03-04 17:50:10.430653000 21106900464378478958823976419287042.695 +116502938 -1216781788 wid upk sey 1955-12-28 20:22:46.506805000 1939-01-24 15:34:19.169731000 -10081106269944932389648987684901194.988 +\N 215463808 hsn lwr xje 1991-04-07 06:25:26.185949000 1963-06-26 10:52:34.771336000 16802096163998673589706649640101869.386 +1561659328 214808438 oju sny kfq 1946-03-27 18:23:21.197012000 1987-11-29 12:21:52.464881000 14022877021406785728817929073938052.405 +-1395828872 1646595280 jey qlw fau 1968-03-06 02:33:29.685995000 1987-10-21 10:06:01.203959000 4118094951760098930682848873840844.223 +-1315218362 -1216257492 toj xsn kfa 1966-11-28 06:26:42.666352000 1939-02-19 17:04:53.343679000 -11549575211531673606760310669732634.12 +130134634 -1216912862 kvq oau grm 1994-12-22 12:55:03.231240000 1963-07-22 12:23:08.945284000 -14318409681181956038692706927707136.028 +1467548196 215070586 fal mxs bvh 1980-05-19 21:06:47.113376000 1963-06-13 10:07:17.684362000 18301719441347059667137714492977367.149 +\N 214677364 pkv toa gbm 1942-07-24 12:39:01.238694000 1987-11-16 11:36:35.377907000 -18623213981561761407810033706892309.338 +\N 1646464206 wrm bvq sni 1934-01-31 12:57:55.104796000 1939-03-17 18:35:27.517627000 21148439473081729248100042595520416.399 +1548158706 -1216650714 bmh yto gbv 1956-01-10 21:08:03.593779000 1939-02-06 16:19:36.256705000 -10039567261241682100372921508667821.284 +-1301586666 -1217043936 lwr pbv cni 1991-04-20 07:10:43.272923000 1963-07-09 11:37:51.858310000 16843635172701923878982715816335243.09 +36023502 214939512 sny wrd oju 1946-04-09 19:08:38.283986000 1987-12-12 13:07:09.551855000 14064416030110036018093995250171426.109 +116634012 1646726354 nid upb cwi 1968-03-19 03:18:46.772969000 1987-11-03 10:51:18.290933000 4159633960463349219958915050074217.927 +\N 1646333132 xsn cwr oje 1966-12-11 07:11:59.753326000 1939-03-04 17:50:10.430653000 -11508036202828423317484244493499260.416 +1561790402 -1216781788 oau sey kvq 1995-01-04 13:40:20.318214000 1938-12-29 14:03:44.995783000 -14276870672478705749416640751473762.324 +-1395697798 215201660 jep qcw xje 1980-06-01 21:52:04.200350000 1963-06-26 10:52:34.771336000 18343258450050309956413780669210740.853 +-1315087288 214808438 toa xse kfq 1942-08-06 13:24:18.325668000 1987-11-29 12:21:52.464881000 -18581674972858511118533967530658935.634 +130265708 1646595280 bvq fau wrm 1934-02-13 13:43:12.191770000 1987-09-25 08:35:27.030011000 21189978481784979537376108771753790.103 +1467679270 -1216519640 fql dxs kfa 1956-01-23 21:53:20.680753000 1939-02-19 17:04:53.343679000 -9998028252538431811096855332434447.58 +\N -1216912862 pbv tfa grm 1991-05-03 07:56:00.359897000 1963-07-22 12:23:08.945284000 16885174181405174168258781992568616.794 +1548289780 215070586 wrd bvh sny 1946-04-22 19:53:55.370960000 1963-05-18 08:36:43.510414000 14105955038813286307370061426404799.813 +-1301455592 1646857428 rmh ytf gbm 1968-04-01 04:04:03.859943000 1987-11-16 11:36:35.377907000 4201172969166599509234981226307591.631 +36154576 1646464206 cwr gbv sni 1966-12-24 07:57:16.840300000 1939-03-17 18:35:27.517627000 -11466497194125173028208178317265886.712 +116765086 -1216650714 sey wid oau 1995-01-17 14:25:37.405188000 1939-01-11 14:49:02.082757000 -14235331663775455460140574575240388.62 +1561921476 215332734 nit ugb cni 1980-06-14 22:37:21.287324000 1963-07-09 11:37:51.858310000 18384797458753560245689846845444114.557 +\N 214939512 xse cwi oju 1942-08-19 14:09:35.412642000 1987-12-12 13:07:09.551855000 -18540135964155260829257901354425561.93 +-1395566724 1646726354 fau jey bvq 1934-02-26 14:28:29.278744000 1987-10-08 09:20:44.116985000 21231517490488229826652174947987163.807 +-1314956214 -1216388566 jup hcw oje 1956-02-05 22:38:37.767727000 1939-03-04 17:50:10.430653000 -9956489243835181521820789156201073.876 +130396782 -1216781788 tfa xje kvq 1991-05-16 08:41:17.446871000 1938-12-29 14:03:44.995783000 16926713190108424457534848168801990.498 +1467810344 215201660 bvh fal wrd 1946-05-05 20:39:12.457934000 1963-05-31 09:22:00.597388000 14147494047516536596646127602638173.517 +1548420854 1646988502 vql dxj kfq 1968-04-14 04:49:20.946917000 1987-11-29 12:21:52.464881000 4242711977869849798511047402540965.335 +\N 1646595280 gbv kfa wrm 1967-01-06 08:42:33.927274000 1987-09-25 08:35:27.030011000 -11424958185421922738932112141032513.008 +-1301324518 -1216519640 wid bmh sey 1995-01-30 15:10:54.492162000 1939-01-24 15:34:19.169731000 -14193792655072205170864508399007014.916 +36285650 215463808 rmx ykf grm 1980-06-27 23:22:38.374298000 1963-07-22 12:23:08.945284000 18426336467456810534965913021677488.261 +116896160 215070586 cwi gbm sny 1942-09-01 14:54:52.499616000 1963-05-18 08:36:43.510414000 -18498596955452010539981835178192188.226 +1562052550 1646857428 jey nid fau 1934-03-11 15:13:46.365718000 1987-10-21 10:06:01.203959000 5408566632826149467328159735024.295 +-1395435650 -1216257492 nyt lgb sni 1956-02-18 23:23:54.854701000 1939-03-17 18:35:27.517627000 -9914950235131931232544722979967700.172 +\N -1216650714 xje cni oau 1991-05-29 09:26:34.533845000 1939-01-11 14:49:02.082757000 16968252198811674746810914345035364.202 +-1314825140 215332734 fal jep bvh 1946-05-18 21:24:29.544908000 1963-06-13 10:07:17.684362000 14189033056219786885922193778871547.221 +130527856 214677364 aup hcn oju 1968-04-27 05:34:38.033891000 1987-12-12 13:07:09.551855000 4284250986573100087787113578774339.039 +1467941418 1646726354 kfa oje bvq 1967-01-19 09:27:51.014248000 1987-10-08 09:20:44.116985000 -11383419176718672449656045964799139.304 +1548551928 -1216388566 bmh fql gbv 1995-02-12 15:56:11.579136000 1939-02-06 16:19:36.256705000 -14152253646368954881588442222773641.212 +-1301193444 -1217043936 vqc doj kvq 1980-07-11 00:07:55.461272000 1938-12-29 14:03:44.995783000 18467875476160060824241979197910861.965 +\N 215201660 gbm kfq wrd 1942-09-14 15:40:09.586590000 1963-05-31 09:22:00.597388000 -18457057946748760250705769001958814.522 +36416724 1646988502 nid rmh cwi 1934-03-24 15:59:03.452692000 1987-11-03 10:51:18.290933000 46947575336076438743394335968397.999 +117027234 1646333132 rdx pkf wrm 1956-03-03 00:09:11.941675000 1987-09-25 08:35:27.030011000 -9873411226428680943268656803734326.468 +1562183624 -1216519640 cni grm sey 1991-06-11 10:11:51.620819000 1939-01-24 15:34:19.169731000 17009791207514925036086980521268737.906 +-1395304576 215463808 jep nit xje 1946-05-31 22:09:46.631882000 1963-06-26 10:52:34.771336000 14230572064923037175198259955104920.925 +-1314694066 214808438 eyt lgr sny 1968-05-10 06:19:55.120865000 1963-05-18 08:36:43.510414000 4325789995276350377063179755007712.743 +\N 1646857428 oje sni fau 1967-02-01 10:13:08.101222000 1987-10-21 10:06:01.203959000 -11341880168015422160379979788565765.6 +130658930 -1216257492 fql jup kfa 1995-02-25 16:41:28.666110000 1939-02-19 17:04:53.343679000 -14110714637665704592312376046540267.508 +1468072492 -1216912862 aug hsn oau 1980-07-24 00:53:12.548246000 1939-01-11 14:49:02.082757000 18509414484863311113518045374144235.669 +1548683002 215332734 kfq oju bvh 1942-09-27 16:25:26.673564000 1963-06-13 10:07:17.684362000 -18415518938045509961429702825725440.818 +-1301062370 214677364 rmh vql gbm 1934-04-06 16:44:20.539666000 1987-11-16 11:36:35.377907000 88486584039326728019460512201771.703 +36547798 1646464206 vhc toj bvq 1956-03-16 00:54:29.028649000 1987-10-08 09:20:44.116985000 -9831872217725430653992590627500952.764 +\N -1216388566 grm kvq gbv 1991-06-24 10:57:08.707793000 1939-02-06 16:19:36.256705000 17051330216218175325363046697502111.61 +117158308 -1217043936 nit rmx cni 1946-06-13 22:55:03.718856000 1963-07-09 11:37:51.858310000 14272111073626287464474326131338294.629 +1562314698 214939512 idx pkv wrd 1968-05-23 07:05:12.207839000 1963-05-31 09:22:00.597388000 4367329003979600666339245931241086.447 +-1395173502 1646988502 sni wrm cwi 1967-02-14 10:58:25.188196000 1987-11-03 10:51:18.290933000 -11300341159312171871103913612332391.896 +-1314562992 1646333132 jup nyt oje 1995-03-10 17:26:45.753084000 1939-03-04 17:50:10.430653000 -14069175628962454303036309870306893.804 +130790004 -1216781788 eyk lwr sey 1980-08-06 01:38:29.635220000 1939-01-24 15:34:19.169731000 18550953493566561402794111550377609.373 +\N 215463808 oju sny xje 1942-10-10 17:10:43.760538000 1963-06-26 10:52:34.771336000 -18373979929342259672153636649492067.114 +1468203566 214808438 vql aup kfq 1934-04-19 17:29:37.626640000 1987-11-29 12:21:52.464881000 130025592742577017295526688435145.407 +1548814076 1646595280 alg xsn fau 1956-03-29 01:39:46.115623000 1987-10-21 10:06:01.203959000 -9790333209022180364716524451267579.06 +-1300931296 -1216257492 kvq oau kfa 1991-07-07 11:42:25.794767000 1939-02-19 17:04:53.343679000 17092869224921425614639112873735485.314 +36678872 -1216912862 rmx vqc grm 1946-06-26 23:40:20.805830000 1963-07-22 12:23:08.945284000 14313650082329537753750392307571668.333 +117289382 215070586 mhc toa bvh 1968-06-05 07:50:29.294813000 1963-06-13 10:07:17.684362000 4408868012682850955615312107474460.151 +\N 214677364 wrm bvq gbm 1967-02-27 11:43:42.275169000 1987-11-16 11:36:35.377907000 -11258802150608921581827847436099018.192 +1562445772 1646464206 nyt rdx sni 1995-03-23 18:12:02.840058000 1939-03-17 18:35:27.517627000 -14027636620259204013760243694073520.1 +-1395042428 -1216650714 ido pbv gbv 1980-08-19 02:23:46.722194000 1939-02-06 16:19:36.256705000 18592492502269811692070177726610983.077 +-1314431918 -1217043936 sny wrd cni 1942-10-23 17:56:00.847512000 1963-07-09 11:37:51.858310000 -18332440920639009382877570473258693.41 +130921078 214939512 aup eyt oju 1934-05-02 18:14:54.713614000 1987-12-12 13:07:09.551855000 171564601445827306571592864668519.111 +1468334640 1646726354 epk cwr cwi 1956-04-11 02:25:03.202597000 1987-11-03 10:51:18.290933000 -9748794200318930075440458275034205.356 +\N 1646333132 oau sey oje 1991-07-20 12:27:42.881741000 1939-03-04 17:50:10.430653000 17134408233624675903915179049968859.018 +1548945150 -1216781788 vqc aug kvq 1946-07-10 00:25:37.892804000 1938-12-29 14:03:44.995783000 14355189091032788043026458483805042.037 +-1300800222 215201660 qlg xse xje 1968-06-18 08:35:46.381786000 1963-06-26 10:52:34.771336000 4450407021386101244891378283707833.855 +36809946 214808438 bvq fau kfq 1967-03-12 12:28:59.362143000 1987-11-29 12:21:52.464881000 -11217263141905671292551781259865644.488 +117420456 1646595280 rdx vhc wrm 1995-04-05 18:57:19.927032000 1987-09-25 08:35:27.030011000 -13986097611555953724484177517840146.396 +1562576846 -1216519640 mhs tfa kfa 1980-09-01 03:09:03.809168000 1939-02-19 17:04:53.343679000 18634031510973061981346243902844356.781 +\N -1216912862 wrd bvh grm 1942-11-05 18:41:17.934486000 1963-07-22 12:23:08.945284000 -18290901911935759093601504297025319.706 +-1394911354 215070586 eyt idx sny 1934-05-15 19:00:11.800588000 1963-05-18 08:36:43.510414000 213103610149077595847659040901892.815 +-1314300844 1646857428 ito gbv gbm 1956-04-24 03:10:20.289571000 1987-11-16 11:36:35.377907000 -9707255191615679786164392098800831.652 +131052152 1646464206 sey wid sni 1991-08-02 13:12:59.968715000 1939-03-17 18:35:27.517627000 17175947242327926193191245226202232.722 +1468465714 -1216650714 aug eyk oau 1946-07-23 01:10:54.979777000 1939-01-11 14:49:02.082757000 14396728099736038332302524660038415.741 +1549076224 215332734 upk cwi cni 1968-07-01 09:21:03.468760000 1963-07-09 11:37:51.858310000 4491946030089351534167444459941207.559 +\N 214939512 fau jey oju \N 1987-12-12 13:07:09.551855000 -11175724133202421003275715083632270.784 +-1300669148 1646726354 vhc alg bvq 1967-03-25 13:14:16.449117000 1987-10-08 09:20:44.116985000 -13944558602852703435208111341606772.692 +36941020 -1216388566 qlw xje oje 1995-04-18 19:42:37.014006000 1939-03-04 17:50:10.430653000 18675570519676312270622310079077730.485 +117551530 -1216781788 bvh fal kvq 1980-09-14 03:54:20.896142000 1938-12-29 14:03:44.995783000 -18249362903232508804325438120791946.002 +1562707920 215201660 idx mhc wrd 1942-11-18 19:26:35.021460000 1963-05-31 09:22:00.597388000 254642618852327885123725217135266.519 +-1394780280 1646988502 mxs kfa kfq 1934-05-28 19:45:28.887562000 1987-11-29 12:21:52.464881000 -9665716182912429496888325922567457.948 +\N 1646595280 wid bmh wrm 1956-05-07 03:55:37.376545000 1987-09-25 08:35:27.030011000 17217486251031176482467311402435606.426 +-1314169770 -1216519640 eyk ido sey 1991-08-15 13:58:17.055689000 1939-01-24 15:34:19.169731000 14438267108439288621578590836271789.445 +131183226 215463808 yto gbm grm 1946-08-05 01:56:12.066751000 1963-07-22 12:23:08.945284000 4533485038792601823443510636174581.263 +1468596788 215070586 jey nid sny 1968-07-14 10:06:20.555734000 1963-05-18 08:36:43.510414000 -11134185124499170713999648907398897.08 +1549207298 1646857428 alg epk fau 1967-04-07 13:59:33.536091000 1987-10-21 10:06:01.203959000 -13903019594149453145932045165373398.988 +-1300538074 -1216257492 upb cni sni 1995-05-01 20:27:54.100980000 1939-03-17 18:35:27.517627000 18717109528379562559898376255311104.189 +\N -1216650714 fal jep oau 1980-09-27 04:39:37.983116000 1939-01-11 14:49:02.082757000 -18207823894529258515049371944558572.298 +37072094 215332734 mhc qlg bvh 1942-12-01 20:11:52.108434000 1963-06-13 10:07:17.684362000 296181627555578174399791393368640.223 +117682604 214677364 qcw oje oju 1934-06-10 20:30:45.974536000 1987-12-12 13:07:09.551855000 -9624177174209179207612259746334084.244 +1562838994 1646726354 bmh fql bvq 1956-05-20 04:40:54.463519000 1987-10-08 09:20:44.116985000 17259025259734426771743377578668980.13 +-1394649206 -1216388566 ido mhs gbv 1991-08-28 14:43:34.142663000 1939-02-06 16:19:36.256705000 14479806117142538910854657012505163.149 +-1314038696 -1217043936 dxs kfq kvq 1946-08-18 02:41:29.153725000 1938-12-29 14:03:44.995783000 4575024047495852112719576812407954.967 +\N 215201660 nid rmh wrd 1968-07-27 10:51:37.642708000 1963-05-31 09:22:00.597388000 -11092646115795920424723582731165523.376 +131314300 1646988502 epk ito cwi 1967-04-20 14:44:50.623065000 1987-11-03 10:51:18.290933000 -13861480585446202856655978989140025.284 +1468727862 1646333132 ytf grm wrm 1995-05-14 21:13:11.187954000 1987-09-25 08:35:27.030011000 18758648537082812849174442431544477.893 +1549338372 -1216519640 jep nit sey 1980-10-10 05:24:55.070090000 1939-01-24 15:34:19.169731000 -18166284885826008225773305768325198.594 +-1300407000 215463808 qlg upk xje 1942-12-14 20:57:09.195408000 1963-06-26 10:52:34.771336000 337720636258828463675857569602013.927 +37203168 214808438 ugb sni sny 1934-06-23 21:16:03.061510000 1963-05-18 08:36:43.510414000 -9582638165505928918336193570100710.54 +\N 1646857428 fql jup fau 1956-06-02 05:26:11.550493000 1987-10-21 10:06:01.203959000 17300564268437677061019443754902353.834 +117813678 -1216257492 mhs qlw kfa 1991-09-10 15:28:51.229636000 1939-02-19 17:04:53.343679000 14521345125845789200130723188738536.853 +1562970068 -1216912862 hcw oju oau 1946-08-31 03:26:46.240699000 1939-01-11 14:49:02.082757000 4616563056199102401995642988641328.671 +-1394518132 215332734 rmh vql bvh 1968-08-09 11:36:54.729682000 1963-06-13 10:07:17.684362000 -11051107107092670135447516554932149.672 +-1313907622 214677364 ito mxs gbm 1967-05-03 15:30:07.710039000 1987-11-16 11:36:35.377907000 -13819941576742952567379912812906651.58 +131445374 1646464206 dxj kvq bvq 1995-05-27 21:58:28.274928000 1987-10-08 09:20:44.116985000 18800187545786063138450508607777851.597 +\N -1216388566 nit rmx gbv 1980-10-23 06:10:12.157064000 1939-02-06 16:19:36.256705000 -18124745877122757936497239592091824.89 +1468858936 -1217043936 upk yto cni 1942-12-27 21:42:26.282382000 1963-07-09 11:37:51.858310000 379259644962078752951923745835387.631 +1549469446 214939512 ykf wrm wrd 1934-07-06 22:01:20.148484000 1963-05-31 09:22:00.597388000 -9541099156802678629060127393867336.836 +-1300275926 1646988502 jup nyt cwi 1956-06-15 06:11:28.637467000 1987-11-03 10:51:18.290933000 17342103277140927350295509931135727.538 +37334242 1646333132 qlw upb oje 1991-09-23 16:14:08.316610000 1939-03-04 17:50:10.430653000 14562884134549039489406789364971910.557 +117944752 -1216781788 lgb sny sey 1946-09-13 04:12:03.327673000 1939-01-24 15:34:19.169731000 4658102064902352691271709164874702.375 +\N 215463808 vql aup xje 1968-08-22 12:22:11.816656000 1963-06-26 10:52:34.771336000 -11009568098389419846171450378698775.968 +1563101142 214808438 mxs qcw kfq 1967-05-16 16:15:24.797013000 1987-11-29 12:21:52.464881000 -13778402568039702278103846636673277.876 +-1394387058 1646595280 hcn oau fau 1995-06-09 22:43:45.361902000 1987-10-21 10:06:01.203959000 18841726554489313427726574784011225.301 +-1313776548 -1216257492 rmx vqc kfa 1980-11-05 06:55:29.244038000 1939-02-19 17:04:53.343679000 -18083206868419507647221173415858451.186 +131576448 -1216912862 yto dxs grm 1943-01-09 22:27:43.369356000 1963-07-22 12:23:08.945284000 420798653665329042227989922068761.335 +1468990010 215070586 doj bvq bvh 1934-07-19 22:46:37.235458000 1963-06-13 10:07:17.684362000 -9499560148099428339784061217633963.132 +\N 214677364 nyt rdx gbm 1956-06-28 06:56:45.724441000 1987-11-16 11:36:35.377907000 17383642285844177639571576107369101.242 +1549600520 1646464206 upb ytf sni 1991-10-06 16:59:25.403584000 1939-03-17 18:35:27.517627000 14604423143252289778682855541205284.261 +-1300144852 -1216650714 pkf wrd gbv 1946-09-26 04:57:20.414647000 1939-02-06 16:19:36.256705000 4699641073605602980547775341108076.079 +37465316 -1217043936 aup eyt cni 1968-09-04 13:07:28.903630000 1963-07-09 11:37:51.858310000 -10968029089686169556895384202465402.264 +118075826 214939512 qcw ugb oju 1967-05-29 17:00:41.883987000 1987-12-12 13:07:09.551855000 -13736863559336451988827780460439904.172 +1563232216 1646726354 lgr sey cwi 1995-06-22 23:29:02.448876000 1987-11-03 10:51:18.290933000 18883265563192563717002640960244599.005 +\N 1646333132 vqc aug oje 1980-11-18 07:40:46.331012000 1939-03-04 17:50:10.430653000 -18041667859716257357945107239625077.482 +-1394255984 -1216781788 dxs hcw kvq 1943-01-22 23:13:00.456330000 1938-12-29 14:03:44.995783000 462337662368579331504056098302135.039 +-1313645474 215201660 hsn fau xje 1934-08-01 23:31:54.322432000 1963-06-26 10:52:34.771336000 -9458021139396178050507995041400589.428 +131707522 214808438 rdx vhc kfq 1956-07-11 07:42:02.811415000 1987-11-29 12:21:52.464881000 17425181294547427928847642283602474.946 +1469121084 1646595280 ytf dxj wrm 1991-10-19 17:44:42.490558000 1987-09-25 08:35:27.030011000 14645962151955540067958921717438657.965 +1549731594 -1216519640 toj bvh kfa 1946-10-09 05:42:37.501621000 1939-02-19 17:04:53.343679000 4741180082308853269823841517341449.783 +\N -1216912862 eyt idx grm 1968-09-17 13:52:45.990604000 1963-07-22 12:23:08.945284000 -10926490080982919267619318026232028.56 +-1300013778 215070586 ugb ykf sny 1967-06-11 17:45:58.970961000 1963-05-18 08:36:43.510414000 -13695324550633201699551714284206530.468 +37596390 1646857428 pkv wid gbm 1995-07-06 00:14:19.535850000 1987-11-16 11:36:35.377907000 18924804571895814006278707136477972.709 +118206900 1646464206 aug eyk sni 1980-12-01 08:26:03.417986000 1939-03-17 18:35:27.517627000 -18000128851013007068669041063391703.778 +1563363290 -1216650714 hcw lgb oau 1943-02-04 23:58:17.543304000 1939-01-11 14:49:02.082757000 503876671071829620780122274535508.743 +-1394124910 215332734 lwr jey cni 1934-08-15 00:17:11.409406000 1963-07-09 11:37:51.858310000 -9416482130692927761231928865167215.724 +\N 214939512 vhc alg oju 1956-07-24 08:27:19.898389000 1987-12-12 13:07:09.551855000 17466720303250678218123708459835848.65 +-1313514400 1646726354 dxj hcn bvq 1991-11-01 18:29:59.577532000 1987-10-08 09:20:44.116985000 14687501160658790357234987893672031.669 +131838596 -1216388566 xsn fal oje 1946-10-22 06:27:54.588595000 1939-03-04 17:50:10.430653000 4782719091012103559099907693574823.487 +1469252158 -1216781788 \N mhc kvq 1968-09-30 14:38:03.077578000 1938-12-29 14:03:44.995783000 -10884951072279668978343251849998654.856 +1549862668 215201660 idx doj wrd 1967-06-24 18:31:16.057935000 1963-05-31 09:22:00.597388000 -13653785541929951410275648107973156.764 +-1299882704 1646988502 ykf bmh kfq 1995-07-19 00:59:36.622824000 1987-11-29 12:21:52.464881000 18966343580599064295554773312711346.413 +\N 1646595280 toa ido wrm 1980-12-14 09:11:20.504960000 1987-09-25 08:35:27.030011000 -17958589842309756779392974887158330.074 +37727464 -1216519640 eyk pkf sey 1943-02-18 00:43:34.630278000 1939-01-24 15:34:19.169731000 545415679775079910056188450768882.447 +118337974 215463808 lgb nid grm 1934-08-28 01:02:28.496380000 1963-07-22 12:23:08.945284000 -9374943121989677471955862688933842.02 +1563494364 215070586 pbv epk sny 1956-08-06 09:12:36.985362000 1963-05-18 08:36:43.510414000 17508259311953928507399774636069222.354 +-1393993836 1646857428 alg lgr fau 1991-11-14 19:15:16.664506000 1987-10-21 10:06:01.203959000 14729040169362040646511054069905405.373 +-1313383326 -1216257492 hcn jep sni 1946-11-04 07:13:11.675569000 1939-03-17 18:35:27.517627000 4824258099715353848375973869808197.191 +\N -1216650714 cwr qlg oau 1968-10-13 15:23:20.164552000 1939-01-11 14:49:02.082757000 -10843412063576418689067185673765281.152 +131969670 215332734 mhc hsn bvh 1967-07-07 19:16:33.144909000 1963-06-13 10:07:17.684362000 -13612246533226701120999581931739783.06 +1469383232 214677364 doj fql oju 1995-08-01 01:44:53.709798000 1987-12-12 13:07:09.551855000 19007882589302314584830839488944720.117 +1549993742 1646726354 xse mhs bvq 1980-12-27 09:56:37.591934000 \N -17917050833606506490116908710924956.37 +-1299751630 -1216388566 ido toj gbv 1943-03-03 01:28:51.717252000 1987-10-08 09:20:44.116985000 586954688478330199332254627002256.151 +37858538 -1217043936 pkf rmh kvq 1934-09-10 01:47:45.583353000 1939-02-06 16:19:36.256705000 -9333404113286427182679796512700468.316 +\N 215201660 tfa ito wrd 1956-08-19 09:57:54.072336000 1938-12-29 14:03:44.995783000 17549798320657178796675840812302596.058 +118469048 1646988502 epk pkv cwi 1991-11-27 20:00:33.751480000 1963-05-31 09:22:00.597388000 14770579178065290935787120246138779.077 +1563625438 1646333132 lgr nit wrm 1946-11-17 07:58:28.762543000 1987-11-03 10:51:18.290933000 4865797108418604137652040046041570.895 +-1393862762 -1216519640 gbv upk sey 1968-10-26 16:08:37.251526000 1987-09-25 08:35:27.030011000 -10801873054873168399791119497531907.448 +-1313252252 215463808 qlg lwr xje 1967-07-20 20:01:50.231883000 1939-01-24 15:34:19.169731000 -13570707524523450831723515755506409.356 +132100744 214808438 hsn jup sny 1995-08-14 02:30:10.796772000 1963-06-26 10:52:34.771336000 19049421598005564874106905665178093.821 +\N 1646857428 cwi qlw fau 1981-01-09 10:41:54.678908000 1963-05-18 08:36:43.510414000 -17875511824903256200840842534691582.666 +1469514306 -1216257492 mhs xsn kfa 1943-03-16 02:14:08.804226000 1987-10-21 10:06:01.203959000 628493697181580488608320803235629.855 +1550124816 -1216912862 toj vql oau 1934-09-23 02:33:02.670327000 1939-02-19 17:04:53.343679000 -9291865104583176893403730336467094.612 +-1299620556 215332734 xje mxs bvh 1956-09-01 10:43:11.159310000 1939-01-11 14:49:02.082757000 17591337329360429085951906988535969.762 +37989612 214677364 ito toa gbm 1991-12-10 20:45:50.838454000 1963-06-13 10:07:17.684362000 14812118186768541225063186422372152.781 +118600122 1646464206 pkv rmx bvq 1946-11-30 08:43:45.849517000 1987-11-16 11:36:35.377907000 4907336117121854426928106222274944.599 +\N -1216388566 kfa yto gbv 1968-11-08 16:53:54.338500000 1987-10-08 09:20:44.116985000 -10760334046169918110515053321298533.744 +1563756512 -1217043936 upk pbv cni 1967-08-02 20:47:07.318857000 1939-02-06 16:19:36.256705000 -13529168515820200542447449579273035.652 +-1393731688 214939512 lwr nyt wrd 1995-08-27 03:15:27.883746000 1963-07-09 11:37:51.858310000 19090960606708815163382971841411467.525 +-1313121178 1646988502 gbm upb cwi 1981-01-22 11:27:11.765882000 1963-05-31 09:22:00.597388000 -17833972816200005911564776358458208.962 +132231818 1646333132 qlw cwr oje 1943-03-29 02:59:25.891200000 1987-11-03 10:51:18.290933000 670032705884830777884386979469003.559 +1469645380 -1216781788 xsn aup sey 1934-10-06 03:18:19.757301000 1939-03-04 17:50:10.430653000 -9250326095879926604127664160233720.908 +\N 215463808 cni qcw xje 1956-09-14 11:28:28.246284000 1939-01-24 15:34:19.169731000 17632876338063679375227973164769343.466 +1550255890 214808438 mxs xse kfq 1991-12-23 21:31:07.925428000 1963-06-26 10:52:34.771336000 14853657195471791514339252598605526.485 +-1299489482 1646595280 toa vqc fau \N 1987-11-29 12:21:52.464881000 4948875125825104716204172398508318.303 +38120686 -1216257492 oje dxs kfa 1946-12-13 09:29:02.936491000 1987-10-21 10:06:01.203959000 -10718795037466667821238987145065160.04 +118731196 -1216912862 yto tfa grm 1968-11-21 17:39:11.425474000 1939-02-19 17:04:53.343679000 -13487629507116950253171383403039661.948 +1563887586 215070586 pbv rdx bvh 1967-08-15 21:32:24.405831000 1963-07-22 12:23:08.945284000 19132499615412065452659038017644841.229 +\N 214677364 kfq ytf gbm 1995-09-09 04:00:44.970720000 1963-06-13 10:07:17.684362000 -17792433807496755622288710182224835.258 +-1393600614 1646464206 upb gbv sni 1981-02-04 12:12:28.852856000 1987-11-16 11:36:35.377907000 711571714588081067160453155702377.263 +-1312990104 -1216650714 cwr eyt gbv 1943-04-11 03:44:42.978174000 1939-03-17 18:35:27.517627000 -9208787087176676314851597984000347.204 +132362892 -1217043936 grm ugb cni 1934-10-19 04:03:36.844275000 1939-02-06 16:19:36.256705000 17674415346766929664504039341002717.17 +1469776454 214939512 qcw cwi oju 1956-09-27 12:13:45.333258000 1963-07-09 11:37:51.858310000 14895196204175041803615318774838900.189 +1550386964 1646726354 xse aug cwi 1992-01-05 22:16:25.012402000 1987-12-12 13:07:09.551855000 4990414134528355005480238574741692.007 +\N 1646333132 sni hcw oje 1946-12-26 10:14:20.023465000 1987-11-03 10:51:18.290933000 -10677256028763417531962920968831786.336 +-1299358408 -1216781788 dxs xje kvq 1968-12-04 18:24:28.512448000 1939-03-04 17:50:10.430653000 -13446090498413699963895317226806288.244 +38251760 215201660 tfa vhc xje 1967-08-28 22:17:41.492805000 1938-12-29 14:03:44.995783000 19174038624115315741935104193878214.933 +118862270 214808438 oju dxj kfq 1995-09-22 04:46:02.057694000 1963-06-26 10:52:34.771336000 -17750894798793505333012644005991461.554 +1564018660 1646595280 ytf kfa wrm 1981-02-17 12:57:45.939829000 1987-11-29 12:21:52.464881000 753110723291331356436519331935750.967 +-1393469540 -1216519640 gbv idx kfa 1943-04-24 04:30:00.065147000 1987-09-25 08:35:27.030011000 -9167248078473426025575531807766973.5 +\N -1216912862 kvq ykf grm 1934-11-01 04:48:53.931249000 1939-02-19 17:04:53.343679000 17715954355470179953780105517236090.874 +-1312859030 215070586 ugb gbm sny 1956-10-10 12:59:02.420232000 1963-07-22 12:23:08.945284000 14936735212878292092891384951072273.893 +132493966 1646857428 cwi eyk gbm 1992-01-18 23:01:42.099376000 1963-05-18 08:36:43.510414000 5031953143231605294756304750975065.711 +1469907528 1646464206 wrm lgb sni 1947-01-08 10:59:37.110439000 1987-11-16 11:36:35.377907000 -10635717020060167242686854792598412.632 +1550518038 -1216650714 hcw cni oau 1968-12-17 19:09:45.599422000 1939-03-17 18:35:27.517627000 -13404551489710449674619251050572914.54 +-1299227334 215332734 xje alg cni 1967-09-10 23:02:58.579779000 1939-01-11 14:49:02.082757000 19215577632818566031211170370111588.637 +\N 214939512 sny hcn oju 1995-10-05 05:31:19.144667000 1963-07-09 11:37:51.858310000 -17709355790090255043736577829758087.85 +38382834 1646726354 dxj oje bvq 1981-03-02 13:43:03.026803000 1987-12-12 13:07:09.551855000 794649731994581645712585508169124.671 +118993344 -1216388566 kfa mhc oje 1943-05-07 05:15:17.152121000 1987-10-08 09:20:44.116985000 -9125709069770175736299465631533599.796 +1564149734 -1216781788 oau doj kvq 1934-11-14 05:34:11.018223000 1939-03-04 17:50:10.430653000 17757493364173430243056171693469464.578 +-1393338466 215201660 ykf kfq wrd 1956-10-23 13:44:19.507206000 1938-12-29 14:03:44.995783000 14978274221581542382167451127305647.597 +-1312727956 1646988502 gbm ido kfq 1992-01-31 23:46:59.186350000 1963-05-31 09:22:00.597388000 5073492151934855584032370927208439.415 +\N 1646595280 bvq pkf wrm 1947-01-21 11:44:54.197413000 1987-11-29 12:21:52.464881000 -10594178011356916953410788616365038.928 +132625040 -1216519640 lgb grm sey 1968-12-30 19:55:02.686396000 \N -13363012481007199385343184874339540.836 +1470038602 215463808 cni epk grm 1967-09-23 23:48:15.666753000 1987-09-25 08:35:27.030011000 19257116641521816320487236546344962.341 +1550649112 215070586 wrd lgr sny 1995-10-18 06:16:36.231641000 1939-01-24 15:34:19.169731000 -17667816781387004754460511653524714.146 +-1299096260 1646857428 hcn sni fau 1981-03-15 14:28:20.113777000 1963-07-22 12:23:08.945284000 836188740697831934988651684402498.375 +38513908 -1216257492 oje \N sni 1943-05-20 06:00:34.239095000 1963-05-18 08:36:43.510414000 -9084170061066925447023399455300226.092 +\N -1216650714 sey qlg oau 1934-11-27 06:19:28.105197000 1987-10-21 10:06:01.203959000 17799032372876680532332237869702838.282 +119124418 215332734 doj hsn bvh 1956-11-05 14:29:36.594180000 1939-03-17 18:35:27.517627000 15019813230284792671443517303539021.301 +1564280808 214677364 kfq oju oju 1992-02-14 00:32:16.273324000 1939-01-11 14:49:02.082757000 5115031160638105873308437103441813.119 +-1393207392 1646726354 fau mhs bvq 1947-02-03 12:30:11.284387000 1963-06-13 10:07:17.684362000 -10552639002653666664134722440131665.224 +-1312596882 -1216388566 pkf toj gbv 1969-01-12 20:40:19.773370000 1987-12-12 13:07:09.551855000 -13321473472303949096067118698106167.132 +132756114 -1217043936 grm kvq kvq 1967-10-07 00:33:32.753727000 1987-10-08 09:20:44.116985000 19298655650225066609763302722578336.045 +\N 215201660 bvh ito wrd 1995-10-31 07:01:53.318615000 1939-02-06 16:19:36.256705000 -17626277772683754465184445477291340.442 +1470169676 1646988502 lgr pkv cwi 1981-03-28 15:13:37.200751000 1938-12-29 14:03:44.995783000 877727749401082224264717860635872.079 +1550780186 1646333132 sni wrm wrm 1943-06-02 06:45:51.326069000 1963-05-31 09:22:00.597388000 -9042631052363675157747333279066852.388 +-1298965186 -1216519640 wid upk sey 1934-12-10 07:04:45.192171000 1987-11-03 10:51:18.290933000 17840571381579930821608304045936211.986 +38644982 215463808 hsn lwr xje 1956-11-18 15:14:53.681154000 1987-09-25 08:35:27.030011000 15061352238988042960719583479772395.005 +119255492 214808438 oju sny sny 1992-02-27 01:17:33.360298000 1939-01-24 15:34:19.169731000 5156570169341356162584503279675186.823 +\N 1646857428 jey qlw fau 1947-02-16 13:15:28.371361000 1963-06-26 10:52:34.771336000 -10511099993950416374858656263898291.52 +1564411882 -1216257492 toj xsn kfa 1969-01-25 21:25:36.860344000 1963-05-18 08:36:43.510414000 -13279934463600698806791052521872793.428 +-1393076318 -1216912862 kvq oau oau 1967-10-20 01:18:49.840701000 1987-10-21 10:06:01.203959000 19340194658928316899039368898811709.749 +-1312465808 215332734 fal mxs bvh 1995-11-13 07:47:10.405589000 1939-02-19 17:04:53.343679000 -17584738763980504175908379301057966.738 +132887188 214677364 pkv toa gbm 1981-04-10 15:58:54.287725000 1939-01-11 14:49:02.082757000 919266758104332513540784036869245.783 +1470300750 1646464206 wrm bvq bvq 1943-06-15 07:31:08.413043000 1963-06-13 10:07:17.684362000 -9001092043660424868471267102833478.684 +\N -1216388566 bmh yto gbv 1934-12-23 07:50:02.279145000 1987-11-16 11:36:35.377907000 17882110390283181110884370222169585.69 +1550911260 -1217043936 lwr pbv cni 1956-12-01 16:00:10.768128000 1987-10-08 09:20:44.116985000 15102891247691293249995649656005768.709 +-1298834112 214939512 sny wrd wrd 1992-03-11 02:02:50.447272000 1939-02-06 16:19:36.256705000 5198109178044606451860569455908560.527 +38776056 1646988502 nid upb cwi 1947-03-01 14:00:45.458335000 1963-07-09 11:37:51.858310000 -10469560985247166085582590087664917.816 +119386566 1646333132 xsn cwr oje 1969-02-07 22:10:53.947318000 1963-05-31 09:22:00.597388000 -13238395454897448517514986345639419.724 +1564542956 -1216781788 oau sey sey 1967-11-02 02:04:06.927675000 1987-11-03 10:51:18.290933000 19381733667631567188315435075045083.453 +\N 215463808 jep qcw xje 1995-11-26 08:32:27.492563000 1939-03-04 17:50:10.430653000 -17543199755277253886632313124824593.034 +-1392945244 214808438 toa xse kfq 1981-04-23 16:44:11.374699000 1939-01-24 15:34:19.169731000 960805766807582802816850213102619.487 +-1312334734 1646595280 bvq fau fau 1943-06-28 08:16:25.500017000 1963-06-26 10:52:34.771336000 -8959553034957174579195200926600104.98 +133018262 -1216257492 fql dxs kfa 1935-01-05 08:35:19.366119000 1987-11-29 12:21:52.464881000 17923649398986431400160436398402959.394 +1470431824 -1216912862 pbv tfa grm 1956-12-14 16:45:27.855102000 1987-10-21 10:06:01.203959000 15144430256394543539271715832239142.413 +1551042334 215070586 wrd bvh bvh 1992-03-24 02:48:07.534246000 1939-02-19 17:04:53.343679000 5239648186747856741136635632141934.231 +\N 214677364 rmh ytf gbm 1947-03-14 14:46:02.545309000 1963-07-22 12:23:08.945284000 -10428021976543915796306523911431544.112 +-1298703038 1646464206 cwr gbv sni 1969-02-20 22:56:11.034292000 1963-06-13 10:07:17.684362000 -13196856446194198228238920169406046.02 +38907130 -1216650714 sey wid gbv 1967-11-15 02:49:24.014649000 1987-11-16 11:36:35.377907000 19423272676334817477591501251278457.157 +119517640 -1217043936 nit ugb cni 1995-12-09 09:17:44.579537000 1939-03-17 18:35:27.517627000 -17501660746574003597356246948591219.33 +1564674030 214939512 xse cwi oju 1981-05-06 17:29:28.461673000 1939-02-06 16:19:36.256705000 1002344775510833092092916389335993.191 +-1392814170 1646726354 fau jey cwi 1943-07-11 09:01:42.586991000 1963-07-09 11:37:51.858310000 -8918014026253924289919134750366731.276 +\N 1646333132 jup hcw oje 1935-01-18 09:20:36.453093000 1987-12-12 13:07:09.551855000 17965188407689681689436502574636333.098 +-1312203660 -1216781788 tfa xje kvq 1956-12-27 17:30:44.942076000 1987-11-03 10:51:18.290933000 15185969265097793828547782008472516.117 +133149336 215201660 bvh fal xje 1992-04-06 03:33:24.621220000 1939-03-04 17:50:10.430653000 5281187195451107030412701808375307.935 +1470562898 214808438 vql dxj kfq 1947-03-27 15:31:19.632283000 1938-12-29 14:03:44.995783000 -10386482967840665507030457735198170.408 +1551173408 1646595280 gbv kfa wrm 1969-03-05 23:41:28.121266000 1963-06-26 10:52:34.771336000 -13155317437490947938962853993172672.316 +-1298571964 -1216519640 wid bmh kfa 1967-11-28 03:34:41.101623000 1987-11-29 12:21:52.464881000 19464811685038067766867567427511830.861 +\N -1216912862 rmx ykf grm 1995-12-22 10:03:01.666511000 1987-09-25 08:35:27.030011000 -17460121737870753308080180772357845.626 +39038204 215070586 cwi gbm sny 1981-05-19 18:14:45.548647000 1939-02-19 17:04:53.343679000 1043883784214083381368982565569366.895 +119648714 1646857428 jey nid gbm 1943-07-24 09:46:59.673965000 1963-07-22 12:23:08.945284000 -8876475017550674000643068574133357.572 +1564805104 1646464206 nyt lgb sni 1935-01-31 10:05:53.540067000 1963-05-18 08:36:43.510414000 18006727416392931978712568750869706.802 +-1392683096 -1216650714 xje cni oau 1957-01-09 18:16:02.029050000 1987-11-16 11:36:35.377907000 15227508273801044117823848184705889.821 +-1312072586 215332734 fal jep cni 1992-04-19 04:18:41.708194000 1939-03-17 18:35:27.517627000 5322726204154357319688767984608681.639 +\N 214939512 aup hcn oju 1947-04-09 16:16:36.719257000 1939-01-11 14:49:02.082757000 -10344943959137415217754391558964796.704 +133280410 1646726354 kfa oje bvq 1969-03-19 00:26:45.208240000 1963-07-09 11:37:51.858310000 -13113778428787697649686787816939298.612 +1470693972 -1216388566 bmh fql oje 1967-12-11 04:19:58.188597000 1987-12-12 13:07:09.551855000 19506350693741318056143633603745204.565 +1551304482 -1216781788 vqc doj kvq 1996-01-04 10:48:18.753485000 1987-10-08 09:20:44.116985000 -17418582729167503018804114596124471.922 +-1298440890 215201660 gbm kfq wrd 1981-06-01 19:00:02.635621000 1939-03-04 17:50:10.430653000 1085422792917333670645048741802740.599 +39169278 1646988502 nid rmh kfq 1943-08-06 10:32:16.760939000 1938-12-29 14:03:44.995783000 -8834936008847423711367002397899983.868 +\N 1646595280 rdx pkf wrm 1935-02-13 10:51:10.627041000 1963-05-31 09:22:00.597388000 18048266425096182267988634927103080.506 +119779788 -1216519640 cni grm sey 1957-01-22 19:01:19.116024000 1987-11-29 12:21:52.464881000 15269047282504294407099914360939263.525 +1564936178 215463808 jep nit grm 1992-05-02 05:03:58.795168000 1987-09-25 08:35:27.030011000 5364265212857607608964834160842055.343 +-1392552022 215070586 eyt lgr sny 1947-04-22 17:01:53.806231000 1939-01-24 15:34:19.169731000 -10303404950434164928478325382731423 +-1311941512 1646857428 oje sni fau 1969-04-01 01:12:02.295213000 1963-07-22 12:23:08.945284000 -13072239420084447360410721640705924.908 +133411484 -1216257492 fql jup sni 1967-12-24 05:05:15.275570000 1963-05-18 08:36:43.510414000 19547889702444568345419699779978578.269 +\N -1216650714 aug hsn oau 1996-01-17 11:33:35.840459000 1987-10-21 10:06:01.203959000 -17377043720464252729528048419891098.218 +1470825046 215332734 kfq oju bvh 1981-06-14 19:45:19.722595000 1939-03-17 18:35:27.517627000 1126961801620583959921114918036114.303 +1551435556 214677364 rmh vql oju 1943-08-19 11:17:33.847913000 1939-01-11 14:49:02.082757000 -8793397000144173422090936221666610.164 +-1298309816 1646726354 vhc toj bvq 1935-02-26 11:36:27.714015000 1963-06-13 10:07:17.684362000 18089805433799432557264701103336454.21 +39300352 -1216388566 grm kvq gbv 1957-02-04 19:46:36.202998000 1987-12-12 13:07:09.551855000 15310586291207544696375980537172637.229 +119910862 -1217043936 nit rmx kvq 1992-05-15 05:49:15.882142000 1987-10-08 09:20:44.116985000 5405804221560857898240900337075429.047 +\N 215201660 idx pkv wrd 1947-05-05 17:47:10.893205000 1939-02-06 16:19:36.256705000 -10261865941730914639202259206498049.296 +1565067252 1646988502 sni wrm cwi 1969-04-14 01:57:19.382187000 1938-12-29 14:03:44.995783000 -13030700411381197071134655464472551.204 +-1392420948 1646333132 jup nyt wrm 1968-01-06 05:50:32.362544000 1963-05-31 09:22:00.597388000 19589428711147818634695765956211951.973 +-1311810438 -1216519640 eyk lwr sey 1996-01-30 12:18:52.927433000 1987-11-03 10:51:18.290933000 -17335504711761002440251982243657724.514 +133542558 215463808 oju sny xje 1981-06-27 20:30:36.809569000 1987-09-25 08:35:27.030011000 1168500810323834249197181094269488.007 +1470956120 214808438 vql aup sny 1943-09-01 12:02:50.934887000 1939-01-24 15:34:19.169731000 -8751857991440923132814870045433236.46 +\N 1646857428 alg xsn fau 1935-03-11 12:21:44.800989000 1963-06-26 10:52:34.771336000 18131344442502682846540767279569827.914 +1551566630 -1216257492 kvq oau kfa 1957-02-17 20:31:53.289972000 1963-05-18 08:36:43.510414000 15352125299910794985652046713406010.933 +-1298178742 -1216912862 rmx vqc oau 1992-05-28 06:34:32.969116000 1987-10-21 10:06:01.203959000 5447343230264108187516966513308802.751 +39431426 215332734 mhc toa bvh 1947-05-18 18:32:27.980178000 1939-02-19 17:04:53.343679000 -10220326933027664349926193030264675.592 +120041936 214677364 wrm bvq gbm 1969-04-27 02:42:36.469161000 1939-01-11 14:49:02.082757000 -12989161402677946781858589288239177.5 +1565198326 1646464206 nyt rdx bvq 1968-01-19 06:35:49.449518000 1963-06-13 10:07:17.684362000 19630967719851068923971832132445325.677 +\N -1216388566 ido pbv gbv 1996-02-12 13:04:10.014407000 1987-11-16 11:36:35.377907000 -17293965703057752150975916067424350.81 +-1392289874 -1217043936 sny wrd cni 1981-07-10 21:15:53.896543000 1987-10-08 09:20:44.116985000 1210039819027084538473247270502861.711 +-1311679364 214939512 aup eyt wrd 1943-09-14 12:48:08.021861000 1939-02-06 16:19:36.256705000 -8710318982737672843538803869199862.756 +133673632 1646988502 epk cwr cwi 1935-03-24 13:07:01.887963000 1963-07-09 11:37:51.858310000 18172883451205933135816833455803201.618 +1471087194 1646333132 oau sey oje 1957-03-02 21:17:10.376946000 1963-05-31 09:22:00.597388000 15393664308614045274928112889639384.637 +1551697704 -1216781788 vqc aug sey 1992-06-10 07:19:50.056090000 1987-11-03 10:51:18.290933000 -14457305820759193856084943006068793.4 +\N 215463808 qlg xse xje 1947-05-31 19:17:45.067152000 1939-03-04 17:50:10.430653000 -10178787924324414060650126854031301.888 +-1298047668 214808438 bvq fau kfq 1969-05-10 03:27:53.556135000 1939-01-24 15:34:19.169731000 -12947622393974696492582523112005803.796 +39562500 1646595280 rdx vhc fau 1968-02-01 07:21:06.536492000 1963-06-26 10:52:34.771336000 -263296658230627690673753078742955.402 +120173010 -1216257492 mhs tfa kfa 1996-02-25 13:49:27.101381000 1987-11-29 12:21:52.464881000 -17252426694354501861699849891190977.106 +1565329400 -1216912862 wrd bvh grm 1981-07-23 22:01:10.983517000 1987-10-21 10:06:01.203959000 1251578827730334827749313446736235.415 +-1392158800 215070586 eyt idx bvh 1943-09-27 13:33:25.108835000 1939-02-19 17:04:53.343679000 13935904563474403629658083479999083.81 +\N 214677364 ito gbv gbm 1935-04-06 13:52:18.974937000 1963-07-22 12:23:08.945284000 18214422459909183425092899632036575.322 +-1311548290 1646464206 sey wid sni 1957-03-15 22:02:27.463920000 1963-06-13 10:07:17.684362000 15435203317317295564204179065872758.341 +133804706 -1216650714 aug eyk gbv 1992-06-23 08:05:07.143064000 1987-11-16 11:36:35.377907000 -14415766812055943566808876829835419.696 +1471218268 -1217043936 upk cwi cni 1947-06-13 20:03:02.154126000 1939-03-17 18:35:27.517627000 -10137248915621163771374060677797928.184 +1551828778 214939512 fau jey oju 1969-05-23 04:13:10.643109000 1939-02-06 16:19:36.256705000 -12906083385271446203306456935772430.092 +-1297916594 1646726354 vhc alg cwi 1968-02-14 08:06:23.623466000 1963-07-09 11:37:51.858310000 -221757649527377401397686902509581.698 +\N 1646333132 qlw xje oje 1996-03-09 14:34:44.188355000 1987-12-12 13:07:09.551855000 -17210887685651251572423783714957603.402 +39693574 -1216781788 bvh fal kvq 1981-08-05 22:46:28.070491000 1987-11-03 10:51:18.290933000 1293117836433585117025379622969609.119 +120304084 215201660 idx mhc xje 1943-10-10 14:18:42.195809000 1939-03-04 17:50:10.430653000 13977443572177653918934149656232457.514 +1565460474 214808438 mxs kfa kfq 1935-04-19 14:37:36.061911000 1938-12-29 14:03:44.995783000 18255961468612433714368965808269949.026 +-1392027726 1646595280 wid bmh wrm 1957-03-28 22:47:44.550894000 1963-06-26 10:52:34.771336000 15476742326020545853480245242106132.045 +-1311417216 -1216519640 eyk ido kfa 1992-07-06 08:50:24.230037000 1987-11-29 12:21:52.464881000 -14374227803352693277532810653602045.992 +\N -1216912862 yto gbm grm 1947-06-26 20:48:19.241100000 1987-09-25 08:35:27.030011000 -10095709906917913482097994501564554.48 +133935780 215070586 jey nid sny 1969-06-05 04:58:27.730083000 1939-02-19 17:04:53.343679000 -12864544376568195914030390759539056.388 +1471349342 1646857428 alg epk \N 1968-02-27 08:51:40.710440000 1963-07-22 12:23:08.945284000 -180218640824127112121620726276207.994 +1551959852 1646464206 upb cni gbm 1996-03-22 15:20:01.275329000 1963-05-18 08:36:43.510414000 -17169348676948001283147717538724229.698 +-1297785520 -1216650714 fal jep sni 1981-08-18 23:31:45.157465000 1987-11-16 11:36:35.377907000 1334656845136835406301445799202982.823 +39824648 215332734 mhc qlg oau 1943-10-23 15:03:59.282783000 1939-03-17 18:35:27.517627000 14018982580880904208210215832465831.218 +\N 214939512 qcw oje cni 1935-05-02 15:22:53.148885000 1939-01-11 14:49:02.082757000 18297500477315684003645031984503322.73 +120435158 1646726354 bmh fql oju 1957-04-10 23:33:01.637868000 1963-07-09 11:37:51.858310000 15518281334723796142756311418339505.749 +1565591548 -1216388566 ido mhs bvq 1992-07-19 09:35:41.317011000 1987-12-12 13:07:09.551855000 -14332688794649442988256744477368672.288 +-1391896652 -1216781788 dxs kfq oje 1947-07-09 21:33:36.328074000 1987-10-08 09:20:44.116985000 -10054170898214663192821928325331180.776 +-1311286142 215201660 nid rmh kvq 1969-06-18 05:43:44.817057000 1939-03-04 17:50:10.430653000 -12823005367864945624754324583305682.684 +134066854 1646988502 epk ito wrd 1968-03-11 09:36:57.797414000 1938-12-29 14:03:44.995783000 -138679632120876822845554550042834.29 +\N 1646595280 ytf grm kfq 1996-04-04 16:05:18.362303000 1963-05-31 09:22:00.597388000 -17127809668244750993871651362490855.994 +1471480416 -1216519640 jep nit wrm 1981-09-01 00:17:02.244439000 1987-11-29 12:21:52.464881000 1376195853840085695577511975436356.527 +1552090926 215463808 qlg upk sey 1943-11-05 15:49:16.369757000 1987-09-25 08:35:27.030011000 14060521589584154497486282008699204.922 +-1297654446 215070586 ugb sni grm 1935-05-15 16:08:10.235859000 1939-01-24 15:34:19.169731000 18339039486018934292921098160736696.434 +39955722 1646857428 fql jup sny 1957-04-24 00:18:18.724842000 1963-07-22 12:23:08.945284000 15559820343427046432032377594572879.453 +120566232 -1216257492 mhs qlw fau 1992-08-01 10:20:58.403985000 1963-05-18 08:36:43.510414000 -14291149785946192698980678301135298.584 +\N -1216650714 hcw oju sni 1947-07-22 22:18:53.415048000 1987-10-21 10:06:01.203959000 -10012631889511412903545862149097807.072 +1565722622 215332734 rmh vql oau 1969-07-01 06:29:01.904031000 1939-03-17 18:35:27.517627000 -12781466359161695335478258407072308.98 +-1391765578 214677364 ito mxs bvh 1968-03-24 10:22:14.884388000 1939-01-11 14:49:02.082757000 -97140623417626533569488373809460.586 +-1311155068 1646726354 dxj kvq oju 1996-04-17 16:50:35.449277000 1963-06-13 10:07:17.684362000 -17086270659541500704595585186257482.29 +134197928 -1216388566 nit rmx bvq 1981-09-14 01:02:19.331413000 1987-12-12 13:07:09.551855000 1417734862543335984853578151669730.231 +1471611490 -1217043936 upk yto gbv 1943-11-18 16:34:33.456731000 1987-10-08 09:20:44.116985000 14102060598287404786762348184932578.626 +\N 215201660 ykf wrm kvq 1935-05-28 16:53:27.322833000 1939-02-06 16:19:36.256705000 18380578494722184582197164336970070.138 +1552222000 1646988502 jup nyt wrd 1957-05-07 01:03:35.811816000 1938-12-29 14:03:44.995783000 15601359352130296721308443770806253.157 +-1297523372 1646333132 qlw upb cwi 1992-08-14 11:06:15.490959000 1963-05-31 09:22:00.597388000 -14249610777242942409704612124901924.88 +40086796 -1216519640 lgb sny wrm 1947-08-04 23:04:10.502022000 1987-11-03 10:51:18.290933000 -9971092880808162614269795972864433.368 +120697306 215463808 vql aup sey 1969-07-14 07:14:18.991005000 1987-09-25 08:35:27.030011000 -12739927350458445046202192230838935.276 +1565853696 214808438 mxs qcw xje 1968-04-06 11:07:31.971362000 1939-01-24 15:34:19.169731000 -55601614714376244293422197576086.882 +\N 1646857428 hcn oau sny 1996-04-30 17:35:52.536251000 1963-06-26 10:52:34.771336000 -17044731650838250415319519010024108.586 +-1391634504 -1216257492 rmx vqc fau 1981-09-27 01:47:36.418387000 1963-05-18 08:36:43.510414000 1459273871246586274129644327903103.935 +-1311023994 -1216912862 yto dxs kfa 1943-12-01 17:19:50.543705000 1987-10-21 10:06:01.203959000 14143599606990655076038414361165952.33 +134329002 215332734 doj bvq oau 1935-06-10 17:38:44.409807000 1939-02-19 17:04:53.343679000 18422117503425434871473230513203443.842 +1471742564 214677364 nyt rdx bvh 1957-05-20 01:48:52.898790000 1939-01-11 14:49:02.082757000 15642898360833547010584509947039626.861 +1552353074 1646464206 upb ytf gbm 1992-08-27 11:51:32.577933000 1963-06-13 10:07:17.684362000 -14208071768539692120428545948668551.176 +\N -1216388566 pkf wrd bvq 1947-08-17 23:49:27.588996000 1987-11-16 11:36:35.377907000 -9929553872104912324993729796631059.664 +-1297392298 -1217043936 aup eyt gbv 1969-07-27 07:59:36.077979000 1987-10-08 09:20:44.116985000 -12698388341755194756926126054605561.572 +40217870 214939512 qcw ugb cni 1968-04-19 11:52:49.058336000 1939-02-06 16:19:36.256705000 -14062606011125955017356021342713.178 +120828380 1646988502 lgr sey wrd 1996-05-13 18:21:09.623225000 1963-07-09 11:37:51.858310000 -17003192642135000126043452833790734.882 +1565984770 1646333132 vqc aug cwi 1981-10-10 02:32:53.505361000 1963-05-31 09:22:00.597388000 1500812879949836563405710504136477.639 +-1391503430 -1216781788 dxs hcw oje 1943-12-14 18:05:07.630679000 1987-11-03 10:51:18.290933000 14185138615693905365314480537399326.034 +\N 215463808 hsn fau sey 1935-06-23 18:24:01.496781000 1939-03-04 17:50:10.430653000 18463656512128685160749296689436817.546 +-1310892920 214808438 rdx vhc xje 1957-06-02 02:34:09.985763000 1939-01-24 15:34:19.169731000 15684437369536797299860576123273000.565 +134460076 1646595280 ytf dxj kfq 1992-09-09 12:36:49.664907000 1963-06-26 10:52:34.771336000 -14166532759836441831152479772435177.472 +1471873638 -1216257492 toj bvh fau 1947-08-31 00:34:44.675970000 1987-11-29 12:21:52.464881000 -9888014863401662035717663620397685.96 +1552484148 -1216912862 eyt idx kfa 1946-05-24 04:27:57.656327000 1987-10-21 10:06:01.203959000 -12656849333051944467650059878372187.868 +-1297261224 215070586 ugb ykf grm 1968-05-02 12:38:06.145310000 1939-02-19 17:04:53.343679000 -21240171529866529632202202809594852.69 +\N 214677364 pkv wid bvh 1996-05-26 19:06:26.710199000 1963-07-22 12:23:08.945284000 -16961653633431749836767386657557361.178 +40348944 1646464206 aug eyk gbm 1958-07-31 10:38:40.835517000 1963-06-13 10:07:17.684362000 1542351888653086852681776680369851.343 +120959454 -1216650714 hcw lgb sni 1943-12-27 18:50:24.717653000 1987-11-16 11:36:35.377907000 14226677624397155654590546713632699.738 +1566115844 -1217043936 lwr jey gbv 1935-07-06 19:09:18.583754000 1939-03-17 18:35:27.517627000 18505195520831935450025362865670191.25 +-1391372356 214939512 vhc alg cni 1970-10-14 05:11:58.262898000 1939-02-06 16:19:36.256705000 15725976378240047589136642299506374.269 +-1310761846 1646726354 dxj hcn oju 1992-09-22 13:22:06.751881000 1963-07-09 11:37:51.858310000 -14124993751133191541876413596201803.768 +\N 1646333132 xsn fal cwi 1947-09-13 01:20:01.762944000 1987-12-12 13:07:09.551855000 -9846475854698411746441597444164312.256 +134591150 -1216781788 idx mhc oje 1946-06-06 05:13:14.743301000 1987-11-03 10:51:18.290933000 -12615310324348694178373993702138814.164 +1472004712 215201660 ykf doj kvq 1968-05-15 13:23:23.232284000 1939-03-04 17:50:10.430653000 -21198632521163279342926136633361478.986 +1552615222 214808438 toa bmh xje 1996-06-08 19:51:43.797173000 1938-12-29 14:03:44.995783000 -16920114624728499547491320481323987.474 +-1297130150 1646595280 eyk ido kfq 1958-08-13 11:23:57.922491000 1963-06-26 10:52:34.771336000 1583890897356337141957842856603225.047 +40480018 -1216519640 lgb pkf wrm 1944-01-09 19:35:41.804627000 1987-11-29 12:21:52.464881000 14268216633100405943866612889866073.442 +\N -1216912862 pbv nid kfa 1935-07-19 19:54:35.670728000 1987-09-25 08:35:27.030011000 18546734529535185739301429041903564.954 +121090528 215070586 alg epk grm 1970-10-27 05:57:15.349872000 1939-02-19 17:04:53.343679000 15767515386943297878412708475739747.973 +1566246918 1646857428 hcn lgr sny 1992-10-05 14:07:23.838855000 1963-07-22 12:23:08.945284000 -14083454742429941252600347419968430.064 +-1391241282 1646464206 cwr jep gbm 1947-09-26 02:05:18.849918000 1963-05-18 08:36:43.510414000 -9804936845995161457165531267930938.552 +-1310630772 -1216650714 mhc qlg sni 1946-06-19 05:58:31.830275000 1987-11-16 11:36:35.377907000 -12573771315645443889097927525905440.46 +134722224 215332734 doj hsn oau 1968-05-28 14:08:40.319258000 1939-03-17 18:35:27.517627000 -21157093512460029053650070457128105.282 +\N 214939512 xse fql cni 1996-06-21 20:37:00.884147000 1939-01-11 14:49:02.082757000 -16878575616025249258215254305090613.77 +1472135786 1646726354 ido mhs oju 1958-08-26 12:09:15.009465000 1963-07-09 11:37:51.858310000 1625429906059587431233909032836598.751 +1552746296 -1216388566 pkf toj bvq 1944-01-22 20:20:58.891601000 1987-12-12 13:07:09.551855000 14309755641803656233142679066099447.146 +-1296999076 -1216781788 tfa rmh oje 1935-08-01 20:39:52.757702000 1987-10-08 09:20:44.116985000 18588273538238436028577495218136938.658 +40611092 215201660 epk ito kvq 1970-11-09 06:42:32.436846000 1939-03-04 17:50:10.430653000 15809054395646548167688774651973121.677 +121221602 1646988502 lgr pkv wrd 1992-10-18 14:52:40.925829000 1938-12-29 14:03:44.995783000 -14041915733726690963324281243735056.36 +\N 1646595280 gbv nit kfq 1947-10-09 02:50:35.936892000 1963-05-31 09:22:00.597388000 -9763397837291911167889465091697564.848 +1566377992 -1216519640 qlg upk wrm 1946-07-02 06:43:48.917249000 1987-11-29 12:21:52.464881000 -12532232306942193599821861349672066.756 +-1391110208 215463808 hsn lwr sey 1968-06-10 14:53:57.406232000 1987-09-25 08:35:27.030011000 -21115554503756778764374004280894731.578 +-1310499698 215070586 cwi jup grm 1996-07-04 21:22:17.971121000 1939-01-24 15:34:19.169731000 -16837036607321998968939188128857240.066 +134853298 1646857428 mhs qlw sny 1958-09-08 12:54:32.096439000 1963-07-22 12:23:08.945284000 1666968914762837720509975209069972.455 +1472266860 -1216257492 toj xsn fau 1944-02-04 21:06:15.978575000 1963-05-18 08:36:43.510414000 14351294650506906522418745242332820.85 +\N -1216650714 xje vql sni 1935-08-14 21:25:09.844676000 1987-10-21 10:06:01.203959000 18629812546941686317853561394370312.362 +1552877370 215332734 ito mxs oau 1970-11-22 07:27:49.523820000 1939-03-17 18:35:27.517627000 15850593404349798456964840828206495.381 +-1296868002 214677364 pkv toa bvh 1992-10-31 15:37:58.012803000 1939-01-11 14:49:02.082757000 -14000376725023440674048215067501682.656 +40742166 1646726354 kfa rmx oju 1947-10-22 03:35:53.023866000 1963-06-13 10:07:17.684362000 -9721858828588660878613398915464191.144 +121352676 -1216388566 upk yto bvq 1946-07-15 07:29:06.004223000 1987-12-12 13:07:09.551855000 -12490693298238943310545795173438693.052 +1566509066 -1217043936 lwr pbv gbv 1968-06-23 15:39:14.493206000 1987-10-08 09:20:44.116985000 -21074015495053528475097938104661357.874 +\N 215201660 gbm nyt kvq 1996-07-17 22:07:35.058095000 1939-02-06 16:19:36.256705000 -16795497598618748679663121952623866.362 +-1390979134 1646988502 qlw upb wrd 1958-09-21 13:39:49.183413000 1938-12-29 14:03:44.995783000 1708507923466088009786041385303346.159 +-1310368624 1646333132 xsn cwr cwi 1944-02-17 21:51:33.065548000 1963-05-31 09:22:00.597388000 14392833659210156811694811418566194.554 +134984372 -1216519640 cni aup wrm 1935-08-27 22:10:26.931650000 1987-11-03 10:51:18.290933000 18671351555644936607129627570603686.066 +1472397934 215463808 mxs qcw sey 1970-12-05 08:13:06.610794000 1987-09-25 08:35:27.030011000 15892132413053048746240907004439869.085 +1553008444 214808438 toa xse xje 1992-11-13 16:23:15.099777000 1939-01-24 15:34:19.169731000 -13958837716320190384772148891268308.952 +\N 1646857428 oje vqc sny 1947-11-04 04:21:10.110840000 1963-06-26 10:52:34.771336000 -9680319819885410589337332739230817.44 +-1296736928 -1216257492 yto dxs fau 1946-07-28 08:14:23.091197000 1963-05-18 08:36:43.510414000 -12449154289535693021269728997205319.348 +40873240 -1216912862 pbv tfa kfa 1968-07-06 16:24:31.580180000 1987-10-21 10:06:01.203959000 -21032476486350278185821871928427984.17 +121483750 215332734 kfq rdx oau 1996-07-30 22:52:52.145068000 1939-02-19 17:04:53.343679000 -16753958589915498390387055776390492.658 +1566640140 214677364 upb ytf bvh 1958-10-04 14:25:06.270386000 1939-01-11 14:49:02.082757000 1750046932169338299062107561536719.863 +-1390848060 1646464206 \N gbv gbm 1944-03-01 22:36:50.152522000 1963-06-13 10:07:17.684362000 14434372667913407100970877594799568.258 +\N -1216388566 cwr eyt bvq 1935-09-09 22:55:44.018624000 1987-11-16 11:36:35.377907000 18712890564348186896405693746837059.77 +-1310237550 -1217043936 grm ugb gbv 1970-12-18 08:58:23.697768000 1987-10-08 09:20:44.116985000 15933671421756299035516973180673242.789 +135115446 214939512 qcw cwi cni 1992-11-26 17:08:32.186751000 1939-02-06 16:19:36.256705000 -13917298707616940095496082715034935.248 +1472529008 1646988502 xse aug wrd 1947-11-17 05:06:27.197814000 1963-07-09 11:37:51.858310000 -9638780811182160300061266562997443.736 +1553139518 1646333132 sni hcw cwi 1946-08-10 08:59:40.178171000 1963-05-31 09:22:00.597388000 -12407615280832442731993662820971945.644 +-1296605854 -1216781788 dxs xje oje 1968-07-19 17:09:48.667154000 1987-11-03 10:51:18.290933000 -20990937477647027896545805752194610.466 +\N 215463808 tfa vhc sey 1996-08-12 23:38:09.232042000 1939-03-04 17:50:10.430653000 -16712419581212248101110989600157118.954 +41004314 214808438 oju dxj xje 1958-10-17 15:10:23.357360000 1939-01-24 15:34:19.169731000 1791585940872588588338173737770093.567 +121614824 1646595280 ytf kfa kfq 1944-03-14 23:22:07.239496000 1963-06-26 10:52:34.771336000 14475911676616657390246943771032941.962 +1566771214 -1216257492 gbv idx fau 1935-09-22 23:41:01.105598000 1987-11-29 12:21:52.464881000 18754429573051437185681759923070433.474 +-1390716986 -1216912862 kvq ykf kfa 1970-12-31 09:43:40.784742000 1987-10-21 10:06:01.203959000 15975210430459549324793039356906616.493 +-1310106476 215070586 ugb gbm grm 1992-12-09 17:53:49.273725000 1939-02-19 17:04:53.343679000 -13875759698913689806220016538801561.544 +\N 214677364 cwi eyk bvh 1947-11-30 05:51:44.284788000 1963-07-22 12:23:08.945284000 -9597241802478910010785200386764070.032 +135246520 1646464206 wrm lgb gbm 1946-08-23 09:44:57.265145000 1963-06-13 10:07:17.684362000 -12366076272129192442717596644738571.94 +1472660082 -1216650714 hcw cni sni 1968-08-01 17:55:05.754128000 1987-11-16 11:36:35.377907000 -20949398468943777607269739575961236.762 +1553270592 -1217043936 xje alg gbv 1996-08-26 00:23:26.319016000 1939-03-17 18:35:27.517627000 -16670880572508997811834923423923745.25 +-1296474780 214939512 sny hcn cni 1958-10-30 15:55:40.444334000 1939-02-06 16:19:36.256705000 1833124949575838877614239914003467.271 +41135388 1646726354 dxj oje oju 1944-03-28 00:07:24.326470000 1963-07-09 11:37:51.858310000 14517450685319907679523009947266315.666 +\N 1646333132 kfa mhc cwi 1935-10-06 00:26:18.192572000 1987-12-12 13:07:09.551855000 18795968581754687474957826099303807.178 +121745898 -1216781788 oau doj oje 1971-01-13 10:28:57.871716000 1987-11-03 10:51:18.290933000 16016749439162799614069105533139990.197 +1566902288 215201660 ykf kfq kvq 1992-12-22 18:39:06.360699000 1939-03-04 17:50:10.430653000 -13834220690210439516943950362568187.84 +-1390585912 214808438 gbm ido xje 1947-12-13 06:37:01.371762000 1938-12-29 14:03:44.995783000 -9555702793775659721509134210530696.328 +-1309975402 1646595280 bvq pkf kfq 1946-09-05 10:30:14.352119000 1963-06-26 10:52:34.771336000 -12324537263425942153441530468505198.236 +135377594 -1216519640 lgb grm wrm 1968-08-14 18:40:22.841102000 1987-11-29 12:21:52.464881000 -20907859460240527317993673399727863.058 +\N -1216912862 cni epk kfa 1996-09-08 01:08:43.405990000 1987-09-25 08:35:27.030011000 -16629341563805747522558857247690371.546 +1472791156 215070586 wrd lgr grm 1958-11-12 16:40:57.531308000 1939-02-19 17:04:53.343679000 1874663958279089166890306090236840.975 +1553401666 1646857428 hcn sni sny 1944-04-10 00:52:41.413444000 1963-07-22 12:23:08.945284000 14558989694023157968799076123499689.37 +-1296343706 1646464206 oje qlg gbm 1935-10-19 01:11:35.279546000 1963-05-18 08:36:43.510414000 18837507590457937764233892275537180.882 +41266462 -1216650714 sey hsn sni 1971-01-26 11:14:14.958690000 1987-11-16 11:36:35.377907000 16058288447866049903345171709373363.901 +121876972 215332734 doj oju oau 1993-01-04 19:24:23.447673000 1939-03-17 18:35:27.517627000 -13792681681507189227667884186334814.136 +\N 214939512 kfq mhs cni 1947-12-26 07:22:18.458736000 1939-01-11 14:49:02.082757000 -9514163785072409432233068034297322.624 +1567033362 1646726354 fau toj oju 1946-09-18 11:15:31.439093000 1963-07-09 11:37:51.858310000 -12282998254722691864165464292271824.532 +-1390454838 -1216388566 pkf kvq bvq 1968-08-27 19:25:39.928076000 1987-12-12 13:07:09.551855000 -20866320451537277028717607223494489.354 +-1309844328 -1216781788 grm \N oje 1996-09-21 01:54:00.492964000 1987-10-08 09:20:44.116985000 -16587802555102497233282791071456997.842 +135508668 215201660 bvh ito kvq 1958-11-25 17:26:14.618282000 1939-03-04 17:50:10.430653000 1916202966982339456166372266470214.679 +1472922230 1646988502 lgr pkv wrd 1944-04-23 01:37:58.500418000 1938-12-29 14:03:44.995783000 14600528702726408258075142299733063.074 +\N 1646595280 sni wrm kfq 1935-11-01 01:56:52.366520000 1963-05-31 09:22:00.597388000 18879046599161188053509958451770554.586 +1553532740 -1216519640 wid upk wrm 1971-02-08 11:59:32.045664000 1987-11-29 12:21:52.464881000 16099827456569300192621237885606737.605 +-1296212632 215463808 hsn lwr sey 1993-01-17 20:09:40.534647000 1987-09-25 08:35:27.030011000 -13751142672803938938391818010101440.432 +41397536 215070586 oju sny grm 1948-01-08 08:07:35.545710000 1939-01-24 15:34:19.169731000 -9472624776369159142957001858063948.92 +122008046 1646857428 jey qlw sny 1946-10-01 12:00:48.526067000 1963-07-22 12:23:08.945284000 \N +1567164436 -1216257492 toj xsn fau 1968-09-09 20:10:57.015050000 1963-05-18 08:36:43.510414000 -12241459246019441574889398116038450.828 +\N -1216650714 kvq oau sni 1996-10-04 02:39:17.579938000 1987-10-21 10:06:01.203959000 -20824781442834026739441541047261115.65 +-1390323764 215332734 fal mxs oau 1958-12-08 18:11:31.705256000 1939-03-17 18:35:27.517627000 -16546263546399246944006724895223624.138 +-1309713254 214677364 pkv toa bvh 1944-05-06 02:23:15.587392000 1939-01-11 14:49:02.082757000 1957741975685589745442438442703588.383 +135639742 1646726354 wrm bvq oju 1935-11-14 02:42:09.453494000 1963-06-13 10:07:17.684362000 14642067711429658547351208475966436.778 +1473053304 -1216388566 bmh yto bvq 1971-02-21 12:44:49.132638000 1987-12-12 13:07:09.551855000 18920585607864438342786024628003928.29 +1553663814 -1217043936 lwr pbv gbv 1993-01-30 20:54:57.621621000 1987-10-08 09:20:44.116985000 16141366465272550481897304061840111.309 +\N 215201660 sny wrd kvq 1948-01-21 08:52:52.632684000 1939-02-06 16:19:36.256705000 -13709603664100688649115751833868066.728 +-1296081558 1646988502 nid upb wrd 1946-10-14 12:46:05.613041000 1938-12-29 14:03:44.995783000 -9431085767665908853680935681830575.216 +41528610 1646333132 xsn cwr cwi 1968-09-22 20:56:14.102024000 1963-05-31 09:22:00.597388000 -12199920237316191285613331939805077.124 +122139120 -1216519640 oau sey wrm 1996-10-17 03:24:34.666912000 1987-11-03 10:51:18.290933000 -20783242434130776450165474871027741.946 +1567295510 215463808 jep qcw sey 1958-12-21 18:56:48.792230000 1987-09-25 08:35:27.030011000 -16504724537695996654730658718990250.434 +-1390192690 214808438 toa xse xje 1944-05-19 03:08:32.674366000 1939-01-24 15:34:19.169731000 1999280984388840034718504618936962.087 +\N 1646857428 bvq fau sny 1935-11-27 03:27:26.540468000 1963-06-26 10:52:34.771336000 14683606720132908836627274652199810.482 +-1309582180 -1216257492 fql dxs fau 1971-03-06 13:30:06.219612000 1963-05-18 08:36:43.510414000 18962124616567688632062090804237301.994 +135770816 -1216912862 pbv tfa kfa 1993-02-12 21:40:14.708595000 1987-10-21 10:06:01.203959000 16182905473975800771173370238073485.013 +1473184378 215332734 wrd bvh oau 1948-02-03 09:38:09.719658000 1939-02-19 17:04:53.343679000 -13668064655397438359839685657634693.024 +1553794888 214677364 rmh ytf bvh 1946-10-27 13:31:22.700015000 1939-01-11 14:49:02.082757000 -9389546758962658564404869505597201.512 +-1295950484 1646464206 cwr gbv gbm 1968-10-05 21:41:31.188998000 1963-06-13 10:07:17.684362000 -12158381228612940996337265763571703.42 +\N -1216388566 sey wid bvq 1996-10-30 04:09:51.753886000 1987-11-16 11:36:35.377907000 -20741703425427526160889408694794368.242 +41659684 -1217043936 nit ugb gbv 1959-01-03 19:42:05.879204000 1987-10-08 09:20:44.116985000 -16463185528992746365454592542756876.73 +122270194 214939512 xse cwi cni 1944-06-01 03:53:49.761340000 1939-02-06 16:19:36.256705000 2040819993092090323994570795170335.791 +1567426584 1646988502 fau jey wrd 1935-12-10 04:12:43.627442000 1963-07-09 11:37:51.858310000 14725145728836159125903340828433184.186 +-1390061616 1646333132 jup hcw cwi 1971-03-19 14:15:23.306586000 1963-05-31 09:22:00.597388000 19003663625270938921338156980470675.698 +-1309451106 -1216781788 tfa xje oje 1993-02-25 22:25:31.795569000 1987-11-03 10:51:18.290933000 16224444482679051060449436414306858.717 +\N 215463808 bvh fal sey 1948-02-16 10:23:26.806632000 1939-03-04 17:50:10.430653000 -13626525646694188070563619481401319.32 +135901890 214808438 vql dxj xje 1946-11-09 14:16:39.786989000 1939-01-24 15:34:19.169731000 -9348007750259408275128803329363827.808 +1473315452 1646595280 gbv kfa kfq 1968-10-18 22:26:48.275971000 1963-06-26 10:52:34.771336000 -12116842219909690707061199587338329.716 +1553925962 -1216257492 wid bmh fau 1996-11-12 04:55:08.840860000 1987-11-29 12:21:52.464881000 -20700164416724275871613342518560994.538 +-1295819410 -1216912862 rmx ykf \N 1959-01-16 20:27:22.966178000 1987-10-21 10:06:01.203959000 -16421646520289496076178526366523503.026 +41790758 215070586 cwi gbm kfa 1944-06-14 04:39:06.848314000 \N 2082359001795340613270636971403709.495 +\N 214677364 jey nid grm 1935-12-23 04:58:00.714416000 1939-02-19 17:04:53.343679000 14766684737539409415179407004666557.89 +122401268 1646464206 nyt lgb bvh 1971-04-01 15:00:40.393560000 1963-07-22 12:23:08.945284000 19045202633974189210614223156704049.402 +1567557658 -1216650714 xje cni gbm 1993-03-10 23:10:48.882543000 \N 16265983491382301349725502590540232.421 +-1389930542 -1217043936 fal jep sni 1948-02-29 11:08:43.893605000 1963-06-13 10:07:17.684362000 -13584986637990937781287553305167945.616 +-1309320032 214939512 aup hcn gbv 1946-11-22 15:01:56.873962000 1987-11-16 11:36:35.377907000 -9306468741556157985852737153130454.104 +136032964 1646726354 kfa oje cni 1968-10-31 23:12:05.362945000 1939-03-17 18:35:27.517627000 -12075303211206440417785133411104956.012 +\N 1646333132 bmh fql oju 1996-11-25 05:40:25.927834000 1939-02-06 16:19:36.256705000 -20658625408021025582337276342327620.834 +1473446526 -1216781788 vqc doj cwi 1959-01-29 21:12:40.053152000 1963-07-09 11:37:51.858310000 -16380107511586245786902460190290129.322 +1554057036 215201660 gbm kfq oje 1944-06-27 05:24:23.935288000 1987-12-12 13:07:09.551855000 2123898010498590902546703147637083.199 +-1295688336 214808438 nid upb kvq 1936-01-05 05:43:17.801390000 1987-11-03 10:51:18.290933000 14808223746242659704455473180899931.594 +41921832 1646595280 rdx pkf xje 1971-04-14 15:45:57.480534000 1939-03-04 17:50:10.430653000 19086741642677439499890289332937423.106 +122532342 -1216519640 cni grm kfq 1993-03-23 23:56:05.969517000 1938-12-29 14:03:44.995783000 16307522500085551639001568766773606.125 +\N -1216912862 jep qcw wrm 1948-03-13 11:54:00.980579000 1963-06-26 10:52:34.771336000 -13543447629287687492011487128934571.912 +1567688732 215070586 eyt lgr kfa 1946-12-05 15:47:13.960936000 1987-11-29 12:21:52.464881000 -9264929732852907696576670976897080.4 +-1389799468 1646857428 oje sni grm 1968-11-13 23:57:22.449919000 1987-09-25 08:35:27.030011000 -12033764202503190128509067234871582.308 +-1309188958 1646464206 fql dxs sny 1996-12-08 06:25:43.014808000 1939-02-19 17:04:53.343679000 -20617086399317775293061210166094247.13 +136164038 -1216650714 aug hsn gbm 1959-02-11 21:57:57.140126000 1963-07-22 12:23:08.945284000 -16338568502882995497626394014056755.618 +1473577600 215332734 kfq oju sni 1944-07-10 06:09:41.022262000 1963-05-18 08:36:43.510414000 2165437019201841191822769323870456.903 +\N 214939512 rmh ytf oau 1936-01-18 06:28:34.888364000 1987-11-16 11:36:35.377907000 14849762754945909993731539357133305.298 +1554188110 1646726354 vhc toj cni 1971-04-27 16:31:14.567508000 1939-03-17 18:35:27.517627000 19128280651380689789166355509170796.81 +-1295557262 -1216388566 grm kvq oju 1993-04-06 00:41:23.056491000 1939-01-11 14:49:02.082757000 16349061508788801928277634943006979.829 +42052906 -1216781788 nit ugb bvq 1948-03-26 12:39:18.067553000 1963-07-09 11:37:51.858310000 -13501908620584437202735420952701198.208 +122663416 215201660 idx pkv oje 1946-12-18 16:32:31.047910000 1987-12-12 13:07:09.551855000 -9223390724149657407300604800663706.696 +1567819806 1646988502 sni wrm kvq 1968-11-27 00:42:39.536893000 1987-10-08 09:20:44.116985000 -11992225193799939839233001058638208.604 +\N 1646595280 jup hcw wrd 1996-12-21 07:11:00.101782000 1939-03-04 17:50:10.430653000 -20575547390614525003785143989860873.426 +-1389668394 -1216519640 eyk lwr kfq 1959-02-24 22:43:14.227100000 1938-12-29 14:03:44.995783000 -16297029494179745208350327837823381.914 +-1309057884 215463808 oju sny wrm 1944-07-23 06:54:58.109236000 1963-05-31 09:22:00.597388000 2206976027905091481098835500103830.607 +136295112 215070586 vql dxj sey 1936-01-31 07:13:51.975338000 1987-11-29 12:21:52.464881000 14891301763649160283007605533366679.002 +1473708674 1646857428 alg xsn grm 1971-05-10 17:16:31.654482000 1987-09-25 08:35:27.030011000 19169819660083940078442421685404170.514 +1554319184 -1216257492 kvq oau sny 1993-04-19 01:26:40.143465000 1939-01-24 15:34:19.169731000 16390600517492052217553701119240353.533 +\N -1216650714 rmx ykf fau 1948-04-08 13:24:35.154527000 1963-07-22 12:23:08.945284000 -13460369611881186913459354776467824.504 +-1295426188 215332734 mhc toa sni 1946-12-31 17:17:48.134884000 1963-05-18 08:36:43.510414000 -9181851715446407118024538624430332.992 +42183980 214677364 wrm bvq oau 1968-12-10 01:27:56.623867000 1987-10-21 10:06:01.203959000 -11950686185096689549956934882404834.9 +122794490 1646726354 nyt lgb bvh 1997-01-03 07:56:17.188756000 1939-03-17 18:35:27.517627000 -20534008381911274714509077813627499.722 +1567950880 -1216388566 ido pbv oju 1959-03-09 23:28:31.314074000 1939-01-11 14:49:02.082757000 -16255490485476494919074261661590008.21 +-1389537320 -1217043936 sny wrd bvq 1944-08-05 07:40:15.196210000 1963-06-13 10:07:17.684362000 2248515036608341770374901676337204.311 +\N 215201660 aup hcn gbv 1936-02-13 07:59:09.062312000 1987-12-12 13:07:09.551855000 14932840772352410572283671709600052.706 +-1308926810 1646988502 epk cwr kvq 1971-05-23 18:01:48.741456000 1987-10-08 09:20:44.116985000 19211358668787190367718487861637544.218 +136426186 1646333132 oau sey wrd 1993-05-02 02:11:57.230438000 1939-02-06 16:19:36.256705000 16432139526195302506829767295473727.237 +1473839748 -1216519640 vqc doj cwi 1948-04-21 14:09:52.241501000 1938-12-29 14:03:44.995783000 -13418830603177936624183288600234450.8 +1554450258 215463808 qlg xse wrm 1947-01-13 18:03:05.221858000 1963-05-31 09:22:00.597388000 -9140312706743156828748472448196959.288 +-1295295114 214808438 bvq fau sey 1968-12-23 02:13:13.710841000 1987-11-03 10:51:18.290933000 -11909147176393439260680868706171461.196 +\N 1646857428 rdx pkf xje 1997-01-16 08:41:34.275730000 1987-09-25 08:35:27.030011000 -20492469373208024425233011637394126.018 +42315054 -1216257492 mhs tfa sny 1959-03-23 00:13:48.401048000 1939-01-24 15:34:19.169731000 -16213951476773244629798195485356634.506 +122925564 -1216912862 wrd bvh fau 1944-08-18 08:25:32.283184000 1963-06-26 10:52:34.771336000 2290054045311592059650967852570578.015 +1568081954 215332734 eyt lgr kfa 1936-02-26 08:44:26.149286000 1963-05-18 08:36:43.510414000 14974379781055660861559737885833426.41 +-1389406246 214677364 ito gbv oau \N 1987-10-21 10:06:01.203959000 19252897677490440656994554037870917.922 +-1308795736 1646464206 sey wid bvh 1971-06-05 18:47:05.828429000 1939-02-19 17:04:53.343679000 16473678534898552796105833471707100.941 +\N -1216388566 aug hsn gbm 1993-05-15 02:57:14.317412000 1939-01-11 14:49:02.082757000 -13377291594474686334907222424001077.096 +136557260 -1217043936 upk cwi bvq 1948-05-04 14:55:09.328475000 1963-06-13 10:07:17.684362000 -9098773698039906539472406271963585.584 +1473970822 214939512 fau jey gbv 1947-01-26 18:48:22.308832000 1987-11-16 11:36:35.377907000 -11867608167690188971404802529938087.492 +1554581332 1646988502 vhc toj cni 1969-01-05 02:58:30.797815000 1987-10-08 09:20:44.116985000 -20450930364504774135956945461160752.314 +-1295164040 1646333132 qlw xje wrd 1997-01-29 09:26:51.362704000 1939-02-06 16:19:36.256705000 -16172412468069994340522129309123260.802 +42446128 -1216781788 bvh fal cwi 1959-04-05 00:59:05.488022000 1963-07-09 11:37:51.858310000 2331593054014842348927034028803951.719 +\N 215463808 idx pkv oje 1944-08-31 09:10:49.370158000 1963-05-31 09:22:00.597388000 15015918789758911150835804062066800.114 +123056638 214808438 mxs kfa sey 1936-03-10 09:29:43.236260000 1987-11-03 10:51:18.290933000 19294436686193690946270620214104291.626 +1568213028 1646595280 wid bmh xje 1971-06-18 19:32:22.915403000 1939-03-04 17:50:10.430653000 16515217543601803085381899647940474.645 +-1389275172 -1216257492 eyk lwr kfq 1993-05-28 03:42:31.404386000 1939-01-24 15:34:19.169731000 -13335752585771436045631156247767703.392 +-1308664662 -1216912862 yto gbm fau 1948-05-17 15:40:26.415449000 1963-06-26 10:52:34.771336000 -9057234689336656250196340095730211.88 +136688334 215070586 jey nid kfa 1947-02-08 19:33:39.395806000 1987-11-29 12:21:52.464881000 -11826069158986938682128736353704713.788 +\N 214677364 alg xsn grm 1969-01-18 03:43:47.884789000 1987-10-21 10:06:01.203959000 -20409391355801523846680879284927378.61 +1474101896 1646464206 upb cni bvh 1997-02-11 10:12:08.449678000 1939-02-19 17:04:53.343679000 -16130873459366744051246063132889887.098 +1554712406 -1216650714 fal jep gbm 1959-04-18 01:44:22.574996000 1963-07-22 12:23:08.945284000 2373132062718092638203100205037325.423 +-1295032966 -1217043936 mhc toa sni 1944-09-13 09:56:06.457132000 1963-06-13 10:07:17.684362000 15057457798462161440111870238300173.818 +42577202 214939512 qcw oje gbv 1936-03-23 10:15:00.323234000 1987-11-16 11:36:35.377907000 19335975694896941235546686390337665.33 +123187712 1646726354 bmh fql cni 1971-07-01 20:17:40.002377000 1939-03-17 18:35:27.517627000 16556756552305053374657965824173848.349 +\N 1646333132 ido pbv oju 1993-06-10 04:27:48.491360000 1939-02-06 16:19:36.256705000 -13294213577068185756355090071534329.688 +1568344102 -1216781788 dxs kfq cwi 1948-05-30 16:25:43.502423000 1963-07-09 11:37:51.858310000 -9015695680633405960920273919496838.176 +-1389144098 215201660 nid rmh oje 1947-02-21 20:18:56.482780000 1987-12-12 13:07:09.551855000 -11784530150283688392852670177471340.084 +-1308533588 214808438 epk cwr kvq 1969-01-31 04:29:04.971763000 1987-11-03 10:51:18.290933000 -20367852347098273557404813108694004.906 +136819408 1646595280 ytf grm xje 1997-02-24 10:57:25.536652000 1939-03-04 17:50:10.430653000 -16089334450663493761969996956656513.394 +1474232970 -1216519640 jep nit kfq 1959-05-01 02:29:39.661970000 1938-12-29 14:03:44.995783000 2414671071421342927479166381270699.127 +\N -1216912862 qlg xse wrm 1944-09-26 10:41:23.544106000 1963-06-26 10:52:34.771336000 15098996807165411729387936414533547.522 +1554843480 215070586 \N sni kfa 1936-04-05 11:00:17.410208000 1987-11-29 12:21:52.464881000 19377514703600191524822752566571039.034 +-1294901892 1646857428 ugb jup grm 1971-07-14 21:02:57.089351000 1987-09-25 08:35:27.030011000 16598295561008303663934032000407222.053 +42708276 1646464206 fql tfa sny 1993-06-23 05:13:05.578334000 1939-02-19 17:04:53.343679000 -13252674568364935467079023895300955.984 +123318786 -1216650714 mhs oju gbm 1948-06-12 17:11:00.589397000 1963-07-22 12:23:08.945284000 -8974156671930155671644207743263464.472 +1568475176 215332734 hcw vql sni 1947-03-06 21:04:13.569754000 1963-05-18 08:36:43.510414000 -11742991141580438103576604001237966.38 +\N 214939512 rmh gbv oau 1969-02-13 05:14:22.058737000 1987-11-16 11:36:35.377907000 -20326313338395023268128746932460631.202 +-1389013024 1646726354 ito kvq cni 1997-03-09 11:42:42.623626000 1939-03-17 18:35:27.517627000 -16047795441960243472693930780423139.69 +-1308402514 -1216388566 dxj rmx oju 1959-05-14 03:14:56.748944000 1939-01-11 14:49:02.082757000 2456210080124593216755232557504072.831 +136950482 -1216781788 nit cwi bvq 1944-10-09 11:26:40.631080000 1963-07-09 11:37:51.858310000 15140535815868662018664002590766921.226 +1474364044 215201660 upk wrm oje 1936-04-18 11:45:34.497182000 1987-12-12 13:07:09.551855000 19419053712303441814098818742804412.738 +1554974554 1646988502 ykf nyt kvq 1971-07-27 21:48:14.176325000 1987-10-08 09:20:44.116985000 16639834569711553953210098176640595.757 +\N 1646595280 jup xje wrd 1993-07-06 05:58:22.665308000 1939-03-04 17:50:10.430653000 -13211135559661685177802957719067582.28 +-1294770818 -1216519640 qlw sny kfq 1948-06-25 17:56:17.676371000 1938-12-29 14:03:44.995783000 -8932617663226905382368141567030090.768 +42839350 215463808 lgb aup wrm 1947-03-19 21:49:30.656728000 1963-05-31 09:22:00.597388000 -11701452132877187814300537825004592.676 +123449860 215070586 vql kfa sey 1969-02-26 05:59:39.145711000 1987-11-29 12:21:52.464881000 -20284774329691772978852680756227257.498 +1568606250 1646857428 mxs oau grm 1997-03-22 12:27:59.710600000 1987-09-25 08:35:27.030011000 -16006256433256993183417864604189765.986 +-1388881950 -1216257492 hcn vqc sny 1959-05-27 04:00:13.835918000 1939-01-24 15:34:19.169731000 \N +\N -1216650714 rmx gbm fau 1944-10-22 12:11:57.718054000 1963-07-22 12:23:08.945284000 2497749088827843506031298733737446.535 +-1308271440 215332734 yto bvq sni 1936-05-01 12:30:51.584155000 1963-05-18 08:36:43.510414000 15182074824571912307940068767000294.93 +137081556 214677364 doj rdx oau 1971-08-09 22:33:31.263299000 1987-10-21 10:06:01.203959000 19460592721006692103374884919037786.442 +1474495118 1646726354 nyt cni bvh 1993-07-19 06:43:39.752282000 1939-03-17 18:35:27.517627000 16681373578414804242486164352873969.461 +1555105628 -1216388566 upb wrd oju 1948-07-08 18:41:34.763345000 1939-01-11 14:49:02.082757000 -13169596550958434888526891542834208.576 +-1294639744 -1217043936 pkf eyt bvq 1947-04-01 22:34:47.743702000 1963-06-13 10:07:17.684362000 -8891078654523655093092075390796717.064 +\N 215201660 aup oje gbv 1969-03-11 06:44:56.232685000 1987-12-12 13:07:09.551855000 -11659913124173937525024471648771218.972 +42970424 1646988502 qcw sey kvq 1997-04-04 13:13:16.797574000 1987-10-08 09:20:44.116985000 -20243235320988522689576614579993883.794 +123580934 1646333132 lgr aug wrd 1959-06-09 04:45:30.922892000 1939-02-06 16:19:36.256705000 -15964717424553742894141798427956392.282 +1568737324 -1216519640 vqc kfq cwi 1944-11-04 12:57:14.805028000 1938-12-29 14:03:44.995783000 2539288097531093795307364909970820.239 +-1388750876 215463808 dxs fau wrm 1936-05-14 13:16:08.671129000 1963-05-31 09:22:00.597388000 15223613833275162597216134943233668.634 +-1308140366 214808438 hsn vhc sey 1971-08-22 23:18:48.350273000 1987-11-03 10:51:18.290933000 19502131729709942392650951095271160.146 +\N 1646857428 rdx grm xje 1993-08-01 07:28:56.839256000 1987-09-25 08:35:27.030011000 16722912587118054531762230529107343.165 +137212630 -1216257492 ytf bvh sny 1948-07-21 19:26:51.850319000 1939-01-24 15:34:19.169731000 -13128057542255184599250825366600834.872 +1474626192 -1216912862 toj idx fau 1947-04-14 23:20:04.830676000 1963-06-26 10:52:34.771336000 -8849539645820404803816009214563343.36 +1555236702 215332734 eyt sni kfa 1969-03-24 07:30:13.319659000 1963-05-18 08:36:43.510414000 -11618374115470687235748405472537845.268 +-1294508670 214677364 ugb wid oau 1997-04-17 13:58:33.884548000 1987-10-21 10:06:01.203959000 -20201696312285272400300548403760510.09 +43101498 1646464206 pkv eyk bvh 1959-06-22 05:30:48.009866000 1939-02-19 17:04:53.343679000 -15923178415850492604865732251723018.578 +\N -1216388566 aug oju gbm 1944-11-17 13:42:31.892002000 1939-01-11 14:49:02.082757000 2580827106234344084583431086204193.943 +123712008 -1217043936 hcw jey bvq 1936-05-27 14:01:25.758103000 1963-06-13 10:07:17.684362000 15265152841978412886492201119467042.338 +1568868398 214939512 lwr alg gbv 1971-09-05 00:04:05.437247000 1987-11-16 11:36:35.377907000 19543670738413192681927017271504533.85 +-1388619802 1646988502 vhc kvq cni 1993-08-14 08:14:13.926230000 1987-10-08 09:20:44.116985000 16764451595821304821038296705340716.869 +-1308009292 1646333132 dxj fal wrd 1948-08-03 20:12:08.937293000 1939-02-06 16:19:36.256705000 -13086518533551934309974759190367461.168 +137343704 -1216781788 xsn mhc cwi 1947-04-28 00:05:21.917650000 1963-07-09 11:37:51.858310000 -8808000637117154514539943038329969.656 +\N 215463808 idx wrm oje 1969-04-06 08:15:30.406633000 1963-05-31 09:22:00.597388000 -11576835106767436946472339296304471.564 +1474757266 214808438 ykf bmh sey 1997-04-30 14:43:50.971522000 1987-11-03 10:51:18.290933000 -20160157303582022111024482227527136.386 +1555367776 1646595280 toa ido xje 1959-07-05 06:16:05.096840000 1939-03-04 17:50:10.430653000 -15881639407147242315589666075489644.874 +-1294377596 -1216257492 eyk sny kfq 1944-11-30 14:27:48.978976000 1939-01-24 15:34:19.169731000 2622366114937594373859497262437567.647 +43232572 -1216912862 lgb nid fau 1936-06-09 14:46:42.845077000 1963-06-26 10:52:34.771336000 15306691850681663175768267295700416.042 +123843082 215070586 pbv epk kfa 1971-09-18 00:49:22.524221000 1987-11-29 12:21:52.464881000 19585209747116442971203083447737907.554 +\N 214677364 alg oau grm 1993-08-27 08:59:31.013204000 1987-10-21 10:06:01.203959000 16805990604524555110314362881574090.573 +1568999472 1646464206 hcn jep bvh 1948-08-16 20:57:26.024267000 1939-02-19 17:04:53.343679000 -13044979524848684020698693014134087.464 +-1388488728 -1216650714 cwr qlg gbm 1947-05-11 00:50:39.004624000 1963-07-22 12:23:08.945284000 -8766461628413904225263876862096595.952 +-1307878218 -1217043936 mhc bvq sni 1969-04-19 09:00:47.493607000 1963-06-13 10:07:17.684362000 -11535296098064186657196273120071097.86 +137474778 214939512 doj fql gbv 1997-05-13 15:29:08.058495000 1987-11-16 11:36:35.377907000 -20118618294878771821748416051293762.682 +1474888340 1646726354 xse mhs cni 1959-07-18 07:01:22.183813000 1939-03-17 18:35:27.517627000 -15840100398443992026313599899256271.17 +\N 1646333132 ido wrd oju 1944-12-13 15:13:06.065949000 1939-02-06 16:19:36.256705000 2663905123640844663135563438670941.351 +1555498850 -1216781788 pkf rmh cwi 1936-06-22 15:31:59.932051000 1963-07-09 11:37:51.858310000 15348230859384913465044333471933789.746 +-1294246522 215201660 tfa ito oje 1971-10-01 01:34:39.611195000 1987-12-12 13:07:09.551855000 19626748755819693260479149623971281.258 +43363646 214808438 epk sey kvq 1993-09-09 09:44:48.100178000 1987-11-03 10:51:18.290933000 16847529613227805399590429057807464.277 +123974156 1646595280 lgr nit xje 1948-08-29 21:42:43.111241000 1939-03-04 17:50:10.430653000 -13003440516145433731422626837900713.76 +1569130546 -1216519640 gbv upk kfq 1947-05-24 01:35:56.091598000 1938-12-29 14:03:44.995783000 -8724922619710653935987810685863222.248 +\N -1216912862 qlg fau wrm 1969-05-02 09:46:04.580581000 1963-06-26 10:52:34.771336000 -11493757089360936367920206943837724.156 +-1388357654 215070586 hsn jup kfa 1997-05-26 16:14:25.145469000 1987-11-29 12:21:52.464881000 -20077079286175521532472349875060388.978 +-1307747144 1646857428 cwi qlw grm 1959-07-31 07:46:39.270787000 1987-09-25 08:35:27.030011000 -15798561389740741737037533723022897.466 +137605852 1646464206 mhs bvh sny 1944-12-26 15:58:23.152923000 1939-02-19 17:04:53.343679000 2705444132344094952411629614904315.055 +1475019414 -1216650714 toj vql gbm 1936-07-05 16:17:17.019025000 1963-07-22 12:23:08.945284000 15389769868088163754320399648167163.45 +1555629924 215332734 xje mxs sni 1971-10-14 02:19:56.698169000 1963-05-18 08:36:43.510414000 19668287764522943549755215800204654.962 +\N 214939512 ito wid oau 1993-09-22 10:30:05.187152000 1987-11-16 11:36:35.377907000 16889068621931055688866495234040837.981 +-1294115448 1646726354 pkv rmx cni 1948-09-11 22:28:00.198215000 1939-03-17 18:35:27.517627000 -12961901507442183442146560661667340.056 +43494720 -1216388566 kfa yto oju 1947-06-06 02:21:13.178572000 1939-01-11 14:49:02.082757000 -8683383611007403646711744509629848.544 +124105230 -1216781788 upk jey bvq 1969-05-15 10:31:21.667555000 1963-07-09 11:37:51.858310000 -11452218080657686078644140767604350.452 +1569261620 215201660 lwr nyt oje 1997-06-08 16:59:42.232443000 1987-12-12 13:07:09.551855000 -20035540277472271243196283698827015.274 +-1388226580 1646988502 gbm upb kvq 1959-08-13 08:31:56.357761000 1987-10-08 09:20:44.116985000 -15757022381037491447761467546789523.762 +\N 1646595280 qlw fal wrd 1945-01-08 16:43:40.239897000 1939-03-04 17:50:10.430653000 2746983141047345241687695791137688.759 +-1307616070 -1216519640 xsn aup kfq 1936-07-18 17:02:34.105999000 1938-12-29 14:03:44.995783000 15431308876791414043596465824400537.154 +137736926 215463808 cni qcw wrm 1971-10-27 03:05:13.785143000 1963-05-31 09:22:00.597388000 19709826773226193839031281976438028.666 +1475150488 215070586 mxs bmh sey 1993-10-05 11:15:22.274126000 1987-11-29 12:21:52.464881000 16930607630634305978142561410274211.685 +1555760998 1646857428 toa vqc grm 1948-09-24 23:13:17.285189000 1987-09-25 08:35:27.030011000 -12920362498738933152870494485433966.352 +-1293984374 -1216257492 oje dxs sny 1947-06-19 03:06:30.265546000 1939-01-24 15:34:19.169731000 -8641844602304153357435678333396474.84 +\N -1216650714 yto nid fau 1969-05-28 11:16:38.754529000 1963-07-22 12:23:08.945284000 -11410679071954435789368074591370976.748 +43625794 215332734 pbv rdx sni 1997-06-21 17:44:59.319417000 1963-05-18 08:36:43.510414000 -19994001268769020953920217522593641.57 +124236304 214677364 kfq ytf oau 1959-08-26 09:17:13.444735000 1987-10-21 10:06:01.203959000 -15715483372334241158485401370556150.058 +1569392694 1646726354 upb jep bvh 1945-01-21 17:28:57.326871000 1939-03-17 18:35:27.517627000 2788522149750595530963761967371062.463 +-1388095506 -1216388566 cwr eyt oju 1936-07-31 17:47:51.192973000 1939-01-11 14:49:02.082757000 15472847885494664332872532000633910.858 +-1307484996 -1217043936 grm ugb bvq 1971-11-09 03:50:30.872117000 1963-06-13 10:07:17.684362000 19751365781929444128307348152671402.37 +\N 215201660 qcw fql gbv 1993-10-18 12:00:39.361100000 1987-12-12 13:07:09.551855000 16972146639337556267418627586507585.389 +137868000 1646988502 xse aug kvq 1948-10-07 23:58:34.372163000 1987-10-08 09:20:44.116985000 -12878823490035682863594428309200592.648 +1475281562 1646333132 sni hcw wrd 1947-07-02 03:51:47.352520000 1939-02-06 16:19:36.256705000 -8600305593600903068159612157163101.136 +1555892072 -1216519640 dxs rmh cwi 1969-06-10 12:01:55.841503000 1938-12-29 14:03:44.995783000 -11369140063251185500092008415137603.044 +-1293853300 215463808 tfa vhc wrm 1997-07-04 18:30:16.406391000 1963-05-31 09:22:00.597388000 -19952462260065770664644151346360267.866 +43756868 214808438 oju dxj sey 1959-09-08 10:02:30.531709000 1987-11-03 10:51:18.290933000 -15673944363630990869209335194322776.354 +124367378 1646857428 ytf nit xje 1945-02-03 18:14:14.413845000 1987-09-25 08:35:27.030011000 2830061158453845820239828143604436.167 +1569523768 -1216257492 gbv idx sny 1936-08-13 18:33:08.279947000 1939-01-24 15:34:19.169731000 15514386894197914622148598176867284.562 +-1387964432 -1216912862 kvq ykf fau 1971-11-22 04:35:47.959091000 1963-06-26 10:52:34.771336000 19792904790632694417583414328904776.074 +-1307353922 215332734 ugb jup kfa 1993-10-31 12:45:56.448074000 1963-05-18 08:36:43.510414000 17013685648040806556694693762740959.093 +137999074 214677364 cwi eyk oau 1948-10-21 00:43:51.459137000 1987-10-21 10:06:01.203959000 -12837284481332432574318362132967218.944 +1475412636 1646464206 wrm lgb bvh 1947-07-15 04:37:04.439494000 1939-02-19 17:04:53.343679000 -8558766584897652778883545980929727.432 +1556023146 -1216388566 hcw vql gbm 1969-06-23 12:47:12.928477000 1939-01-11 14:49:02.082757000 -11327601054547935210815942238904229.34 +-1293722226 -1217043936 xje alg bvq 1997-07-17 19:15:33.493365000 1963-06-13 10:07:17.684362000 -19910923251362520375368085170126894.162 +43887942 214939512 sny hcn gbv 1959-09-21 10:47:47.618683000 1987-11-16 11:36:35.377907000 -15632405354927740579933269018089402.65 +124498452 1646988502 dxj rmx cni 1945-02-16 18:59:31.500819000 1987-10-08 09:20:44.116985000 2871600167157096109515894319837809.871 +1569654842 1646333132 kfa mhc wrd 1936-08-26 19:18:25.366921000 1939-02-06 16:19:36.256705000 15555925902901164911424664353100658.266 +-1387833358 -1216781788 oau doj cwi 1971-12-05 05:21:05.046065000 1963-07-09 11:37:51.858310000 19834443799335944706859480505138149.778 +-1307222848 215463808 ykf nyt oje 1993-11-13 13:31:13.535048000 1963-05-31 09:22:00.597388000 17055224656744056845970759938974332.797 +138130148 214808438 gbm ido sey 1948-11-03 01:29:08.546111000 1987-11-03 10:51:18.290933000 \N +1475543710 1646595280 bvq pkf xje 1947-07-28 05:22:21.526468000 1939-03-04 17:50:10.430653000 -12795745472629182285042295956733845.24 +1556154220 -1216257492 lgb aup kfq 1969-07-06 13:32:30.015451000 1939-01-24 15:34:19.169731000 -8517227576194402489607479804696353.728 +-1293591152 -1216912862 cni epk fau 1997-07-30 20:00:50.580339000 1963-06-26 10:52:34.771336000 -11286062045844684921539876062670855.636 +44019016 215070586 wrd lgr kfa 1959-10-04 11:33:04.705657000 1987-11-29 12:21:52.464881000 -19869384242659270086092018993893520.458 +124629526 214677364 hcn vqc grm 1945-03-01 19:44:48.587793000 1987-10-21 10:06:01.203959000 -15590866346224490290657202841856028.946 +1569785916 1646464206 oje qlg bvh 1936-09-08 20:03:42.453895000 1939-02-19 17:04:53.343679000 2913139175860346398791960496071183.575 +-1387702284 -1216650714 sey hsn gbm 1971-12-18 06:06:22.133039000 1963-07-22 12:23:08.945284000 15597464911604415200700730529334031.97 +-1307091774 -1217043936 doj rdx sni 1993-11-26 14:16:30.622022000 1963-06-13 10:07:17.684362000 19875982808039194996135546681371523.482 +138261222 214939512 kfq mhs gbv 1948-11-16 02:14:25.633085000 1987-11-16 11:36:35.377907000 17096763665447307135246826115207706.501 +1475674784 1646726354 fau toj cni 1947-08-10 06:07:38.613442000 1939-03-17 18:35:27.517627000 -12754206463925931995766229780500471.536 +1556285294 1646333132 pkf eyt oju 1969-07-19 14:17:47.102425000 1939-02-06 16:19:36.256705000 -8475688567491152200331413628462980.024 +-1293460078 \N grm ito cwi 1997-08-12 20:46:07.667313000 1963-07-09 11:37:51.858310000 -11244523037141434632263809886437481.932 +44150090 -1216781788 bvh pkv oje 1959-10-17 12:18:21.792631000 1987-12-12 13:07:09.551855000 -19827845233956019796815952817660146.754 +124760600 215201660 lgr aug kvq 1945-03-14 20:30:05.674767000 1987-11-03 10:51:18.290933000 -15549327337521240001381136665622655.242 +1569916990 214808438 sni upk xje 1936-09-21 20:48:59.540869000 \N 2954678184563596688068026672304557.279 +-1387571210 1646595280 wid lwr kfq 1971-12-31 06:51:39.220013000 1939-03-04 17:50:10.430653000 15639003920307665489976796705567405.674 +-1306960700 -1216519640 hsn vhc wrm 1993-12-09 15:01:47.708996000 1938-12-29 14:03:44.995783000 19917521816742445285411612857604897.186 +138392296 -1216912862 oju qlw kfa 1948-11-29 02:59:42.720059000 1963-06-26 10:52:34.771336000 17138302674150557424522892291441080.205 +1475805858 215070586 jey xsn grm 1947-08-23 06:52:55.700416000 1987-11-29 12:21:52.464881000 -12712667455222681706490163604267097.832 +1556416368 1646857428 toj idx sny 1969-08-01 15:03:04.189398000 1987-09-25 08:35:27.030011000 -8434149558787901911055347452229606.32 +-1293329004 1646464206 kvq mxs gbm 1997-08-25 21:31:24.754287000 1939-02-19 17:04:53.343679000 -11202984028438184342987743710204108.228 +44281164 -1216650714 fal toa sni 1959-10-30 13:03:38.879605000 1963-07-22 12:23:08.945284000 -19786306225252769507539886641426773.05 +124891674 215332734 pkv eyk oau 1945-03-27 21:15:22.761741000 1963-05-18 08:36:43.510414000 -15507788328817989712105070489389281.538 +1570048064 214939512 wrm yto cni 1936-10-04 21:34:16.627843000 1987-11-16 11:36:35.377907000 2996217193266846977344092848537930.983 +-1387440136 1646726354 bmh pbv oju 1972-01-13 07:36:56.306987000 1939-03-17 18:35:27.517627000 15680542929010915779252862881800779.378 +-1306829626 -1216388566 lwr alg bvq 1993-12-22 15:47:04.795970000 1939-01-11 14:49:02.082757000 19959060825445695574687679033838270.89 +138523370 -1216781788 sny upb oje 1948-12-12 03:44:59.807033000 1963-07-09 11:37:51.858310000 17179841682853807713798958467674453.909 +1475936932 215201660 nid cwr kvq 1947-09-05 07:38:12.787390000 1987-12-12 13:07:09.551855000 -12671128446519431417214097428033724.128 +1556547442 1646988502 xsn mhc wrd 1969-08-14 15:48:21.276372000 1987-10-08 09:20:44.116985000 -8392610550084651621779281275996232.616 +-1293197930 1646595280 oau qcw kfq 1997-09-07 22:16:41.841261000 1939-03-04 17:50:10.430653000 -11161445019734934053711677533970734.524 +44412238 -1216519640 jep xse wrm 1959-11-12 13:48:55.966579000 1938-12-29 14:03:44.995783000 -19744767216549519218263820465193399.346 +125022748 215463808 toa ido sey 1945-04-09 22:00:39.848715000 1963-05-31 09:22:00.597388000 -15466249320114739422829004313155907.834 +1570179138 215070586 bvq dxs grm 1936-10-17 22:19:33.714817000 1987-11-29 12:21:52.464881000 3037756201970097266620159024771304.687 +-1387309062 1646857428 fql tfa sny 1972-01-26 08:22:13.393961000 1987-09-25 08:35:27.030011000 15722081937714166068528929058034153.082 +-1306698552 -1216257492 pbv epk fau 1994-01-04 16:32:21.882944000 1939-01-24 15:34:19.169731000 20000599834148945863963745210071644.594 +138654444 -1216650714 wrd ytf sni 1948-12-25 04:30:16.894006000 1963-07-22 12:23:08.945284000 17221380691557058003075024643907827.613 +1476068006 215332734 rmh gbv oau 1947-09-18 08:23:29.874363000 1963-05-18 08:36:43.510414000 -12629589437816181127938031251800350.424 +1556678516 214677364 cwr qlg bvh 1969-08-27 16:33:38.363346000 1987-10-21 10:06:01.203959000 -8351071541381401332503215099762858.912 +-1293066856 1646726354 sey ugb oju 1997-09-20 23:01:58.928235000 1939-03-17 18:35:27.517627000 -11119906011031683764435611357737360.82 +44543312 -1216388566 nit cwi bvq 1959-11-25 14:34:13.053553000 1939-01-11 14:49:02.082757000 -19703228207846268928987754288960025.642 +125153822 -1217043936 xse mhs gbv 1945-04-22 22:45:56.935689000 1963-06-13 10:07:17.684362000 -15424710311411489133552938136922534.13 +1570310212 215201660 fau hcw kvq 1936-10-30 23:04:50.801791000 1987-12-12 13:07:09.551855000 3079295210673347555896225201004678.391 +-1387177988 1646988502 jup xje wrd 1972-02-08 09:07:30.480935000 1987-10-08 09:20:44.116985000 15763620946417416357804995234267526.786 +-1306567478 1646333132 tfa ito cwi 1994-01-17 17:17:38.969918000 1939-02-06 16:19:36.256705000 20042138842852196153239811386305018.298 +138785518 -1216519640 bvh dxj wrm 1949-01-07 05:15:33.980980000 1938-12-29 14:03:44.995783000 17262919700260308292351090820141201.317 +1476199080 215463808 vql kfa sey 1947-10-01 09:08:46.961337000 1963-05-31 09:22:00.597388000 -12588050429112930838661965075566976.72 +1556809590 214808438 gbv upk xje 1969-09-09 17:18:55.450320000 1987-11-03 10:51:18.290933000 -8309532532678151043227148923529485.208 +-1292935782 1646857428 wid ykf sny 1997-10-03 23:47:16.015209000 1987-09-25 08:35:27.030011000 -11078367002328433475159545181503987.116 +44674386 -1216257492 rmx gbm fau 1959-12-08 15:19:30.140527000 1939-01-24 15:34:19.169731000 -19661689199143018639711688112726651.938 +125284896 -1216912862 cwi qlw kfa 1945-05-05 23:31:14.022663000 1963-06-26 10:52:34.771336000 -15383171302708238844276871960689160.426 +1570441286 215332734 jey lgb oau 1936-11-12 23:50:07.888765000 1963-05-18 08:36:43.510414000 3120834219376597845172291377238052.095 +-1387046914 214677364 nyt cni bvh 1972-02-21 09:52:47.567909000 1987-10-21 10:06:01.203959000 15805159955120666647081061410500900.49 +-1306436404 1646464206 xje mxs gbm 1994-01-30 18:02:56.056892000 1939-02-19 17:04:53.343679000 20083677851555446442515877562538392.002 +138916592 -1216388566 fal hcn bvq 1949-01-20 06:00:51.067954000 1939-01-11 14:49:02.082757000 17304458708963558581627156996374575.021 +1476330154 -1217043936 aup oje gbv 1947-10-14 09:54:04.048311000 1963-06-13 10:07:17.684362000 -12546511420409680549385898899333603.016 +1556940664 214939512 kfa yto cni 1969-09-22 18:04:12.537294000 1987-11-16 11:36:35.377907000 -8267993523974900753951082747296111.504 +-1292804708 1646988502 bmh doj wrd 1997-10-17 00:32:33.102183000 1987-10-08 09:20:44.116985000 -11036827993625183185883479005270613.412 +44805460 1646333132 vqc kfq cwi 1959-12-21 16:04:47.227501000 1939-02-06 16:19:36.256705000 -19620150190439768350435621936493278.234 +125415970 -1216781788 gbm upb oje 1945-05-19 00:16:31.109637000 1963-07-09 11:37:51.858310000 -15341632294004988555000805784455786.722 +1570572360 215463808 nid pkf sey 1936-11-26 00:35:24.975739000 1963-05-31 09:22:00.597388000 3162373228079848134448357553471425.799 +-1386915840 214808438 rdx grm xje 1972-03-05 10:38:04.654883000 1987-11-03 10:51:18.290933000 15846698963823916936357127586734274.194 +-1306305330 1646595280 cni qcw kfq 1994-02-12 18:48:13.143865000 1939-03-04 17:50:10.430653000 20125216860258696731791943738771765.706 +139047666 -1216257492 jep lgr fau 1949-02-02 06:46:08.154928000 1939-01-24 15:34:19.169731000 17345997717666808870903223172607948.725 +1476461228 -1216912862 eyt sni kfa 1947-10-27 10:39:21.135285000 1963-06-26 10:52:34.771336000 -12504972411706430260109832723100229.312 +1557071738 215070586 oje dxs grm 1969-10-05 18:49:29.624268000 1987-11-29 12:21:52.464881000 -8226454515271650464675016571062737.8 +-1292673634 214677364 fql hsn bvh 1997-10-30 01:17:50.189157000 1987-10-21 10:06:01.203959000 -10995288984921932896607412829037239.708 +44936534 1646464206 aug oju gbm 1960-01-03 16:50:04.314475000 1939-02-19 17:04:53.343679000 -19578611181736518061159555760259904.53 +125547044 -1216650714 kfq ytf sni 1945-06-01 01:01:48.196611000 1963-07-22 12:23:08.945284000 -15300093285301738265724739608222413.018 +1570703434 -1217043936 rmh toj gbv 1936-12-09 01:20:42.062713000 1963-06-13 10:07:17.684362000 3203912236783098423724423729704799.503 +\N 214939512 vhc kvq cni 1972-03-18 11:23:21.741857000 1987-11-16 11:36:35.377907000 15888237972527167225633193762967647.898 +-1386784766 1646726354 grm ugb oju 1994-02-25 19:33:30.230839000 1939-03-17 18:35:27.517627000 20166755868961947021068009915005139.41 +-1306174256 1646333132 nit pkv cwi 1949-02-15 07:31:25.241902000 1939-02-06 16:19:36.256705000 17387536726370059160179289348841322.429 +139178740 -1216781788 idx wrm oje 1947-11-09 11:24:38.222259000 1963-07-09 11:37:51.858310000 -12463433403003179970833766546866855.608 +1476592302 215201660 sni hcw kvq 1969-10-18 19:34:46.711242000 1987-12-12 13:07:09.551855000 -8184915506568400175398950394829364.096 +1557202812 214808438 jup lwr xje 1997-11-12 02:03:07.276131000 1987-11-03 10:51:18.290933000 -10953749976218682607331346652803866.004 +-1292542560 1646595280 eyk sny kfq 1960-01-16 17:35:21.401449000 1939-03-04 17:50:10.430653000 -19537072173033267771883489584026530.826 +45067608 -1216519640 oju dxj wrm 1945-06-14 01:47:05.283585000 1938-12-29 14:03:44.995783000 -15258554276598487976448673431989039.314 +125678118 -1216912862 vql xsn kfa 1936-12-22 02:05:59.149687000 1963-06-26 10:52:34.771336000 3245451245486348713000489905938173.207 +1570834508 215070586 alg oau grm 1972-03-31 12:08:38.828830000 \N 15929776981230417514909259939201021.602 +-1386653692 1646857428 kvq ykf sny 1994-03-10 20:18:47.317813000 1987-11-29 12:21:52.464881000 20208294877665197310344076091238513.114 +-1306043182 1646464206 rmx toa gbm 1949-02-28 08:16:42.328876000 1987-09-25 08:35:27.030011000 17429075735073309449455355525074696.133 +139309814 -1216650714 mhc bvq sni 1947-11-22 12:09:55.309233000 1939-02-19 17:04:53.343679000 -12421894394299929681557700370633481.904 +1476723376 215332734 wrm lgb oau 1969-10-31 20:20:03.798216000 1963-07-22 12:23:08.945284000 -8143376497865149886122884218595990.392 +1557333886 214939512 nyt pbv cni 1997-11-25 02:48:24.363105000 1963-05-18 08:36:43.510414000 -10912210967515432318055280476570492.3 +-1292411486 1646726354 ido wrd oju 1960-01-29 18:20:38.488423000 1987-11-16 11:36:35.377907000 -19495533164330017482607423407793157.122 +45198682 -1216388566 sny hcn bvq 1945-06-27 02:32:22.370559000 1939-03-17 18:35:27.517627000 -15217015267895237687172607255755665.61 +125809192 -1216781788 aup cwr oje 1937-01-04 02:51:16.236661000 1939-01-11 14:49:02.082757000 3286990254189599002276556082171546.911 +1570965582 215201660 epk sey kvq 1972-04-13 12:53:55.915804000 1963-07-09 11:37:51.858310000 15971315989933667804185326115434395.306 +-1386522618 1646988502 oau doj wrd 1994-03-23 21:04:04.404787000 1987-12-12 13:07:09.551855000 20249833886368447599620142267471886.818 +-1305912108 1646595280 vqc xse kfq 1949-03-13 09:01:59.415850000 1987-10-08 09:20:44.116985000 17470614743776559738731421701308069.837 +139440888 -1216519640 qlg fau wrm 1947-12-05 12:55:12.396207000 1939-03-04 17:50:10.430653000 -12380355385596679392281634194400108.2 +1476854450 215463808 bvq \N sey 1969-11-13 21:05:20.885190000 1938-12-29 14:03:44.995783000 -8101837489161899596846818042362616.688 +1557464960 215070586 rdx pkf grm 1997-12-08 03:33:41.450079000 1963-05-31 09:22:00.597388000 -10870671958812182028779214300337118.596 +-1292280412 1646857428 mhs tfa sny 1960-02-11 19:05:55.575397000 1987-11-29 12:21:52.464881000 -19453994155626767193331357231559783.418 +45329756 -1216257492 wrd bvh fau 1945-07-10 03:17:39.457533000 1987-09-25 08:35:27.030011000 -15175476259191987397896541079522291.906 +125940266 -1216650714 eyt lgr sni 1937-01-17 03:36:33.323635000 1939-01-24 15:34:19.169731000 3328529262892849291552622258404920.615 +1571096656 215332734 ito gbv oau 1972-04-26 13:39:13.002778000 1963-07-22 12:23:08.945284000 16012854998636918093461392291667769.01 +-1386391544 214677364 sey wid bvh 1994-04-05 21:49:21.491761000 1963-05-18 08:36:43.510414000 20291372895071697888896208443705260.522 +-1305781034 1646726354 aug hsn oju 1949-03-26 09:47:16.502824000 1987-10-21 10:06:01.203959000 17512153752479810028007487877541443.541 +139571962 -1216388566 upk cwi bvq 1947-12-18 13:40:29.483181000 1939-03-17 18:35:27.517627000 -12338816376893429103005568018166734.496 +1476985524 -1217043936 fau jey gbv 1969-11-26 21:50:37.972164000 1939-01-11 14:49:02.082757000 -8060298480458649307570751866129242.984 +1557596034 215201660 vhc toj kvq 1997-12-21 04:18:58.537053000 1963-06-13 10:07:17.684362000 -10829132950108931739503148124103744.892 +-1292149338 1646988502 qlw xje wrd 1960-02-24 19:51:12.662371000 1987-12-12 13:07:09.551855000 -19412455146923516904055291055326409.714 +45460830 1646333132 bvh fal cwi 1945-07-23 04:02:56.544507000 1987-10-08 09:20:44.116985000 -15133937250488737108620474903288918.202 +126071340 -1216519640 idx pkv wrm 1937-01-30 04:21:50.410609000 1939-02-06 16:19:36.256705000 3370068271596099580828688434638294.319 +1571227730 215463808 mxs kfa sey 1972-05-09 14:24:30.089752000 1938-12-29 14:03:44.995783000 16054394007340168382737458467901142.714 +-1386260470 214808438 wid bmh xje 1994-04-18 22:34:38.578735000 1963-05-31 09:22:00.597388000 20332911903774948178172274619938634.226 +-1305649960 1646857428 eyk lwr sny 1949-04-08 10:32:33.589798000 1987-11-03 10:51:18.290933000 17553692761183060317283554053774817.245 +139703036 -1216257492 yto gbm fau 1947-12-31 14:25:46.570155000 1987-09-25 08:35:27.030011000 -12297277368190178813729501841933360.792 +1477116598 -1216912862 jey nid kfa 1969-12-09 22:35:55.059138000 1939-01-24 15:34:19.169731000 -8018759471755399018294685689895869.28 +1557727108 215332734 alg xsn oau 1998-01-03 05:04:15.624027000 1963-06-26 10:52:34.771336000 -10787593941405681450227081947870371.188 +-1292018264 214677364 upb cni bvh 1960-03-08 20:36:29.749345000 1963-05-18 08:36:43.510414000 -19370916138220266614779224879093036.01 +45591904 1646464206 fal jep gbm 1945-08-05 04:48:13.631481000 1987-10-21 10:06:01.203959000 -15092398241785486819344408727055544.498 +126202414 -1216388566 \N toa bvq 1937-02-12 05:07:07.497583000 1939-02-19 17:04:53.343679000 3411607280299349870104754610871668.023 +1571358804 -1217043936 mhc oje gbv 1972-05-22 15:09:47.176726000 1939-01-11 14:49:02.082757000 16095933016043418672013524644134516.418 +-1386129396 214939512 qcw fql cni 1994-05-01 23:19:55.665709000 1963-06-13 10:07:17.684362000 20374450912478198467448340796172007.93 +-1305518886 1646988502 bmh pbv wrd 1949-04-21 11:17:50.676772000 1987-11-16 11:36:35.377907000 17595231769886310606559620230008190.949 +139834110 1646333132 ido kfq cwi 1948-01-13 15:11:03.657129000 1987-10-08 09:20:44.116985000 -12255738359486928524453435665699987.088 +1477247672 -1216781788 dxs rmh oje 1969-12-22 23:21:12.146112000 1939-02-06 16:19:36.256705000 -7977220463052148729018619513662495.576 +1557858182 215463808 nid cwr sey 1998-01-16 05:49:32.711001000 1963-07-09 11:37:51.858310000 -10746054932702431160951015771636997.484 +-1291887190 214808438 epk grm xje 1960-03-21 21:21:46.836319000 1963-05-31 09:22:00.597388000 -19329377129517016325503158702859662.306 +45722978 1646595280 ytf nit kfq 1945-08-18 05:33:30.718455000 1987-11-03 10:51:18.290933000 -15050859233082236530068342550822170.794 +126333488 -1216257492 jep xse fau 1937-02-25 05:52:24.584556000 1939-03-04 17:50:10.430653000 3453146289002600159380820787105041.727 +1571489878 -1216912862 qlg sni kfa 1972-06-04 15:55:04.263700000 1939-01-24 15:34:19.169731000 16137472024746668961289590820367890.122 +-1385998322 215070586 ugb jup grm 1994-05-15 00:05:12.752683000 1963-06-26 10:52:34.771336000 20415989921181448756724406972405381.634 +-1305387812 214677364 fql tfa bvh 1949-05-04 12:03:07.763746000 1987-11-29 12:21:52.464881000 17636770778589560895835686406241564.653 +139965184 1646464206 mhs oju gbm 1948-01-26 15:56:20.744103000 1987-10-21 10:06:01.203959000 -12214199350783678235177369489466613.384 +1477378746 -1216650714 hcw vql sni 1933-06-24 00:08:04.626239000 1939-02-19 17:04:53.343679000 -7935681454348898439742553337429121.872 +1557989256 -1217043936 rmh gbv gbv 1998-01-29 06:34:49.797975000 1963-07-22 12:23:08.945284000 -10704515923999180871674949595403623.78 +-1291756116 214939512 ito kvq cni 1960-04-03 22:07:03.923293000 1963-06-13 10:07:17.684362000 -19287838120813766036227092526626288.602 +45854052 1646726354 dxj rmx oju 1945-08-31 06:18:47.805429000 1987-11-16 11:36:35.377907000 -15009320224378986240792276374588797.09 +126464562 1646333132 nit cwi cwi 1937-03-10 06:37:41.671530000 1939-03-17 18:35:27.517627000 3494685297705850448656886963338415.431 +1571620952 -1216781788 upk wrm oje 1972-06-17 16:40:21.350674000 1939-02-06 16:19:36.256705000 16179011033449919250565656996601263.826 +-1385867248 215201660 ykf nyt kvq 1994-05-28 00:50:29.839657000 1963-07-09 11:37:51.858310000 20457528929884699046000473148638755.338 +-1305256738 214808438 jup xje xje 1949-05-17 12:48:24.850720000 1987-12-12 13:07:09.551855000 17678309787292811185111752582474938.357 +140096258 1646595280 qlw sny kfq 1948-02-08 16:41:37.831077000 1987-11-03 10:51:18.290933000 -12172660342080427945901303313233239.68 +1477509820 -1216519640 lgb aup wrm 1933-07-07 00:53:21.713213000 1939-03-04 17:50:10.430653000 -7894142445645648150466487161195748.168 +1558120330 -1216912862 vql kfa kfa 1998-02-11 07:20:06.884949000 1938-12-29 14:03:44.995783000 -10662976915295930582398883419170250.076 +-1291625042 215070586 mxs oau grm 1960-04-16 22:52:21.010267000 1963-06-26 10:52:34.771336000 -19246299112110515746951026350392914.898 +45985126 1646857428 hcn vqc sny 1945-09-13 07:04:04.892403000 1987-11-29 12:21:52.464881000 -14967781215675735951516210198355423.386 +126595636 1646464206 rmx gbm gbm 1937-03-23 07:22:58.758504000 1987-09-25 08:35:27.030011000 3536224306409100737932953139571789.135 +1571752026 -1216650714 yto bvq sni 1972-06-30 17:25:38.437648000 1939-02-19 17:04:53.343679000 16220550042153169539841723172834637.53 +-1385736174 215332734 doj rdx oau 1994-06-10 01:35:46.926631000 1963-07-22 12:23:08.945284000 20499067938587949335276539324872129.042 +-1305125664 214939512 nyt cni cni 1949-05-30 13:33:41.937694000 1963-05-18 08:36:43.510414000 17719848795996061474387818758708312.061 +140227332 1646726354 upb wrd oju 1948-02-21 17:26:54.918051000 1987-11-16 11:36:35.377907000 -12131121333377177656625237136999865.976 +1477640894 -1216388566 pkf eyt bvq 1933-07-20 01:38:38.800187000 1939-03-17 18:35:27.517627000 -7852603436942397861190420984962374.464 +1558251404 -1216781788 aup oje oje 1998-02-24 08:05:23.971923000 1939-01-11 14:49:02.082757000 -10621437906592680293122817242936876.372 +-1291493968 215201660 qcw sey kvq 1960-04-29 23:37:38.097241000 1963-07-09 11:37:51.858310000 -19204760103407265457674960174159541.194 +46116200 1646988502 lgr aug wrd 1945-09-26 07:49:21.979376000 1987-12-12 13:07:09.551855000 -14926242206972485662240144022122049.682 +126726710 1646595280 vqc kfq kfq 1937-04-05 08:08:15.845478000 1987-10-08 09:20:44.116985000 3577763315112351027209019315805162.839 +1571883100 -1216519640 dxs fau wrm 1972-07-13 18:10:55.524622000 1939-03-04 17:50:10.430653000 16262089050856419829117789349068011.234 +-1385605100 215463808 hsn vhc sey 1994-06-23 02:21:04.013605000 1938-12-29 14:03:44.995783000 20540606947291199624552605501105502.746 +-1304994590 215070586 rdx grm grm 1949-06-12 14:18:59.024668000 1963-05-31 09:22:00.597388000 17761387804699311763663884934941685.765 +140358406 1646857428 ytf bvh sny 1948-03-05 18:12:12.005025000 1987-11-29 12:21:52.464881000 -12089582324673927367349170960766492.272 +1477771968 -1216257492 toj idx fau 1933-08-02 02:23:55.887161000 1987-09-25 08:35:27.030011000 -7811064428239147571914354808729000.76 +1558382478 -1216650714 eyt sni sni 1998-03-09 08:50:41.058896000 1939-01-24 15:34:19.169731000 -10579898897889430003846751066703502.668 +-1291362894 215332734 ugb wid oau 1960-05-13 00:22:55.184214000 1963-07-22 12:23:08.945284000 -19163221094704015168398893997926167.49 +46247274 214677364 pkv eyk bvh 1945-10-09 08:34:39.066350000 1963-05-18 08:36:43.510414000 -14884703198269235372964077845888675.978 +126857784 1646726354 aug oju oju 1937-04-18 08:53:32.932452000 1987-10-21 10:06:01.203959000 3619302323815601316485085492038536.543 +1572014174 -1216388566 hcw jey bvq 1972-07-26 18:56:12.611596000 1939-03-17 18:35:27.517627000 16303628059559670118393855525301384.938 +-1385474026 -1217043936 lwr alg gbv 1994-07-06 03:06:21.100579000 1939-01-11 14:49:02.082757000 20582145955994449913828671677338876.45 +-1304863516 215201660 vhc kvq kvq 1949-06-25 15:04:16.111642000 1963-06-13 10:07:17.684362000 17802926813402562052939951111175059.469 +140489480 1646988502 dxj fal wrd 1948-03-18 18:57:29.091999000 1987-12-12 13:07:09.551855000 -12048043315970677078073104784533118.568 +1477903042 1646333132 xsn mhc cwi 1933-08-15 03:09:12.974135000 1987-10-08 09:20:44.116985000 -7769525419535897282638288632495627.056 +1558513552 -1216519640 idx wrm wrm 1998-03-22 09:35:58.145870000 1939-02-06 16:19:36.256705000 -10538359889186179714570684890470128.964 +-1291231820 215463808 ykf bmh sey 1960-05-26 01:08:12.271188000 1938-12-29 14:03:44.995783000 -19121682086000764879122827821692793.786 +46378348 214808438 toa ido xje 1945-10-22 09:19:56.153324000 1963-05-31 09:22:00.597388000 -14843164189565985083688011669655302.274 +126988858 1646857428 eyk sny sny 1937-05-01 09:38:50.019426000 1987-11-03 10:51:18.290933000 3660841332518851605761151668271910.247 +1572145248 -1216257492 lgb nid fau 1972-08-08 19:41:29.698570000 1987-09-25 08:35:27.030011000 16345167068262920407669921701534758.642 +-1385342952 -1216912862 pbv epk kfa 1994-07-19 03:51:38.187553000 1939-01-24 15:34:19.169731000 20623684964697700203104737853572250.154 +-1304732442 215332734 alg oau oau 1949-07-08 15:49:33.198616000 1963-06-26 10:52:34.771336000 17844465822105812342216017287408433.173 +140620554 214677364 hcn jep bvh 1948-03-31 19:42:46.178973000 1963-05-18 08:36:43.510414000 -12006504307267426788797038608299744.864 +1478034116 1646464206 cwr qlg gbm 1933-08-28 03:54:30.061109000 1987-10-21 10:06:01.203959000 -7727986410832646993362222456262253.352 +1558644626 -1216388566 mhc bvq bvq 1998-04-04 10:21:15.232844000 1939-02-19 17:04:53.343679000 -10496820880482929425294618714236755.26 +-1291100746 -1217043936 doj fql gbv 1960-06-08 01:53:29.358162000 1939-01-11 14:49:02.082757000 -19080143077297514589846761645459420.082 +46509422 214939512 xse mhs cni 1945-11-04 10:05:13.240298000 1963-06-13 10:07:17.684362000 -14801625180862734794411945493421928.57 +127119932 1646988502 ido wrd wrd 1937-05-14 10:24:07.106400000 1987-11-16 11:36:35.377907000 3702380341222101895037217844505283.951 +1572276322 1646333132 pkf rmh cwi 1972-08-21 20:26:46.785544000 1987-10-08 09:20:44.116985000 16386706076966170696945987877768132.346 +-1385211878 -1216781788 tfa ito oje 1994-08-01 04:36:55.274527000 1939-02-06 16:19:36.256705000 20665223973400950492380804029805623.858 +-1304601368 215463808 epk sey sey 1949-07-21 16:34:50.285590000 1963-07-09 11:37:51.858310000 17886004830809062631492083463641806.877 +140751628 214808438 lgr nit xje 1948-04-13 20:28:03.265947000 1963-05-31 09:22:00.597388000 -11964965298564176499520972432066371.16 +1478165190 1646595280 gbv upk kfq 1933-09-10 04:39:47.148083000 1987-11-03 10:51:18.290933000 -7686447402129396704086156280028879.648 +1558775700 -1216257492 qlg fau fau 1998-04-17 11:06:32.319818000 1939-03-04 17:50:10.430653000 -10455281871779679136018552538003381.556 +-1290969672 -1216912862 hsn jup kfa 1960-06-21 02:38:46.445136000 1939-01-24 15:34:19.169731000 -19038604068594264300570695469226046.378 +46640496 215070586 cwi qlw grm 1945-11-17 10:50:30.327272000 1963-06-26 10:52:34.771336000 -14760086172159484505135879317188554.866 +127251006 214677364 mhs bvh bvh 1937-05-27 11:09:24.193374000 1987-11-29 12:21:52.464881000 3743919349925352184313284020738657.655 +1572407396 1646464206 toj vql gbm 1972-09-03 21:12:03.872518000 1987-10-21 10:06:01.203959000 16428245085669420986222054054001506.05 +-1385080804 -1216650714 xje mxs sni 1994-08-14 05:22:12.361501000 1939-02-19 17:04:53.343679000 20706762982104200781656870206038997.562 +-1304470294 -1217043936 ito wid \N 1949-08-03 17:20:07.372564000 1963-07-22 12:23:08.945284000 17927543839512312920768149639875180.581 +140882702 214939512 pkv rmx gbv 1948-04-26 21:13:20.352921000 1963-06-13 10:07:17.684362000 -11923426289860926210244906255832997.456 +1478296264 1646726354 kfa yto cni 1933-09-23 05:25:04.235057000 1987-11-16 11:36:35.377907000 -7644908393426146414810090103795505.944 +1558906774 1646333132 upk jey oju 1998-04-30 11:51:49.406792000 1939-03-17 18:35:27.517627000 -10413742863076428846742486361770007.852 +-1290838598 -1216781788 lwr nyt cwi 1960-07-04 03:24:03.532110000 1939-02-06 16:19:36.256705000 -18997065059891014011294629292992672.674 +46771570 215201660 gbm upb oje 1945-11-30 11:35:47.414246000 1963-07-09 11:37:51.858310000 -14718547163456234215859813140955181.162 +127382080 214808438 qlw fal kvq 1937-06-09 11:54:41.280348000 1987-12-12 13:07:09.551855000 3785458358628602473589350196972031.359 +1572538470 1646595280 xsn aup xje 1972-09-16 21:57:20.959492000 1987-11-03 10:51:18.290933000 16469784094372671275498120230234879.754 +-1384949730 -1216519640 cni qcw kfq 1994-08-27 06:07:29.448475000 1939-03-04 17:50:10.430653000 20748301990807451070932936382272371.266 +-1304339220 -1216912862 mxs bmh wrm 1949-08-16 18:05:24.459538000 1938-12-29 14:03:44.995783000 17969082848215563210044215816108554.285 +141013776 215070586 toa vqc kfa 1948-05-09 21:58:37.439895000 1963-06-26 10:52:34.771336000 -11881887281157675920968840079599623.752 +1478427338 1646857428 oje dxs grm 1933-10-06 06:10:21.322031000 1987-11-29 12:21:52.464881000 -7603369384722896125534023927562132.24 +1559037848 1646464206 yto nid sny 1998-05-13 12:37:06.493766000 1987-09-25 08:35:27.030011000 -10372203854373178557466420185536634.148 +-1290707524 -1216650714 pbv rdx gbm 1960-07-17 04:09:20.619084000 1939-02-19 17:04:53.343679000 -18955526051187763722018563116759298.97 +46902644 215332734 kfq ytf sni 1945-12-13 12:21:04.501220000 1963-07-22 12:23:08.945284000 -14677008154752983926583746964721807.458 +127513154 214939512 upb jep oau 1937-06-22 12:39:58.367322000 1963-05-18 08:36:43.510414000 3826997367331852762865416373205405.063 +1572669544 1646726354 cwr eyt cni 1972-09-29 22:42:38.046466000 1987-11-16 11:36:35.377907000 16511323103075921564774186406468253.458 +-1384818656 -1216388566 grm ugb oju 1994-09-09 06:52:46.535449000 1939-03-17 18:35:27.517627000 20789840999510701360209002558505744.97 +-1304208146 -1216781788 qcw fql \N 1949-08-29 18:50:41.546512000 1939-01-11 14:49:02.082757000 18010621856918813499320281992341927.989 +141144850 215201660 xse aug bvq 1948-05-22 22:43:54.526869000 1963-07-09 11:37:51.858310000 -11840348272454425631692773903366250.048 +1478558412 1646988502 sni hcw oje 1933-10-19 06:55:38.409005000 1987-12-12 13:07:09.551855000 -7561830376019645836257957751328758.536 +1559168922 1646595280 dxs rmh kvq 1998-05-26 13:22:23.580740000 1987-10-08 09:20:44.116985000 -10330664845669928268190354009303260.444 +-1290576450 -1216519640 tfa vhc wrd 1960-07-30 04:54:37.706058000 1939-03-04 17:50:10.430653000 \N +47033718 215463808 oju dxj kfq 1945-12-26 13:06:21.588194000 1938-12-29 14:03:44.995783000 -18913987042484513432742496940525925.266 +127644228 215070586 ytf nit wrm 1937-07-05 13:25:15.454296000 1963-05-31 09:22:00.597388000 -14635469146049733637307680788488433.754 +1572800618 1646857428 gbv idx sey 1972-10-12 23:27:55.133440000 1987-11-29 12:21:52.464881000 3868536376035103052141482549438778.767 +-1384687582 -1216257492 kvq ykf grm 1994-09-22 07:38:03.622423000 1987-09-25 08:35:27.030011000 16552862111779171854050252582701627.162 +-1304077072 -1216650714 ugb jup sny 1949-09-11 19:35:58.633486000 1939-01-24 15:34:19.169731000 20831380008213951649485068734739118.674 +141275924 215332734 cwi eyk fau 1948-06-04 23:29:11.613843000 1963-07-22 12:23:08.945284000 18052160865622063788596348168575301.693 +1478689486 214677364 wrm lgb sni 1933-11-01 07:40:55.495979000 1963-05-18 08:36:43.510414000 -11798809263751175342416707727132876.344 +1559299996 1646726354 hcw vql oau 1998-06-08 14:07:40.667714000 1987-10-21 10:06:01.203959000 -7520291367316395546981891575095384.832 +-1290445376 -1216388566 xje alg bvh 1960-08-12 05:39:54.793032000 1939-03-17 18:35:27.517627000 -10289125836966677978914287833069886.74 +47164792 -1217043936 sny hcn oju 1946-01-08 13:51:38.675168000 1939-01-11 14:49:02.082757000 -18872448033781263143466430764292551.562 +127775302 215201660 dxj rmx bvq 1937-07-18 14:10:32.541270000 1963-06-13 10:07:17.684362000 -14593930137346483348031614612255060.05 +1572931692 1646988502 kfa mhc gbv 1972-10-26 00:13:12.220414000 1987-12-12 13:07:09.551855000 3910075384738353341417548725672152.471 +-1384556508 1646333132 oau doj kvq 1994-10-05 08:23:20.709397000 1987-10-08 09:20:44.116985000 16594401120482422143326318758935000.866 +-1303945998 -1216519640 ykf nyt wrd 1949-09-24 20:21:15.720460000 1939-02-06 16:19:36.256705000 20872919016917201938761134910972492.378 +141406998 215463808 gbm ido cwi 1948-06-18 00:14:28.700817000 1938-12-29 14:03:44.995783000 18093699874325314077872414344808675.397 +1478820560 214808438 bvq pkf wrm 1933-11-14 08:26:12.582953000 1963-05-31 09:22:00.597388000 -11757270255047925053140641550899502.64 +1559431070 1646857428 lgb aup sey 1998-06-21 14:52:57.754688000 1987-11-03 10:51:18.290933000 -7478752358613145257705825398862011.128 +-1290314302 -1216257492 cni epk xje 1960-08-25 06:25:11.880006000 1987-09-25 08:35:27.030011000 -10247586828263427689638221656836513.036 +47295866 -1216912862 wrd lgr sny 1946-01-21 14:36:55.762142000 1939-01-24 15:34:19.169731000 -18830909025078012854190364588059177.858 +127906376 215332734 hcn vqc fau 1937-07-31 14:55:49.628244000 1963-06-26 10:52:34.771336000 -14552391128643233058755548436021686.346 +1573062766 214677364 oje qlg kfa 1972-11-08 00:58:29.307388000 1963-05-18 08:36:43.510414000 3951614393441603630693614901905526.175 +-1384425434 1646464206 sey hsn oau 1994-10-18 09:08:37.796371000 1987-10-21 10:06:01.203959000 16635940129185672432602384935168374.57 +-1303814924 -1216388566 doj rdx bvh 1949-10-07 21:06:32.807434000 1939-02-19 17:04:53.343679000 20914458025620452228037201087205866.082 +141538072 -1217043936 kfq mhs gbm 1948-07-01 00:59:45.787790000 1939-01-11 14:49:02.082757000 18135238883028564367148480521042049.101 +1478951634 214939512 fau toj bvq 1933-11-27 09:11:29.669926000 1963-06-13 10:07:17.684362000 -11715731246344674763864575374666128.936 +1559562144 1646988502 pkf eyt gbv 1998-07-04 15:38:14.841662000 1987-11-16 11:36:35.377907000 -7437213349909894968429759222628637.424 +-1290183228 1646333132 grm ito cni 1960-09-07 07:10:28.966980000 \N -10206047819560177400362155480603139.332 +47426940 -1216781788 bvh pkv wrd 1946-02-03 15:22:12.849116000 1987-10-08 09:20:44.116985000 -18789370016374762564914298411825804.154 +128037450 215463808 lgr aug cwi 1937-08-13 15:41:06.715218000 1939-02-06 16:19:36.256705000 -14510852119939982769479482259788312.642 +1573193840 214808438 sni upk oje 1972-11-21 01:43:46.394362000 1963-07-09 11:37:51.858310000 3993153402144853919969681078138899.879 +-1384294360 1646595280 wid lwr sey 1994-10-31 09:53:54.883345000 1963-05-31 09:22:00.597388000 16677479137888922721878451111401748.274 +-1303683850 -1216257492 hsn vhc xje 1949-10-20 21:51:49.894407000 1987-11-03 10:51:18.290933000 20955997034323702517313267263439239.786 +141669146 -1216912862 oju qlw kfq 1948-07-14 01:45:02.874764000 1939-03-04 17:50:10.430653000 18176777891731814656424546697275422.805 +1479082708 215070586 jey xsn fau 1933-12-10 09:56:46.756900000 1939-01-24 15:34:19.169731000 -11674192237641424474588509198432755.232 +1559693218 214677364 toj idx kfa 1998-07-17 16:23:31.928636000 1963-06-26 10:52:34.771336000 -7395674341206644679153693046395263.72 +-1290052154 1646464206 kvq mxs grm 1960-09-20 07:55:46.053954000 1987-11-29 12:21:52.464881000 -10164508810856927111086089304369765.628 +47558014 -1216650714 fal toa bvh 1946-02-16 16:07:29.936090000 1987-10-21 10:06:01.203959000 -18747831007671512275638232235592430.45 +128168524 -1217043936 pkv eyk gbm 1937-08-26 16:26:23.802192000 1939-02-19 17:04:53.343679000 -14469313111236732480203416083554938.938 +1573324914 214939512 wrm yto sni 1972-12-04 02:29:03.481336000 1963-07-22 12:23:08.945284000 4034692410848104209245747254372273.583 +-1384163286 1646726354 bmh pbv gbv 1994-11-13 10:39:11.970319000 1963-06-13 10:07:17.684362000 16719018146592173011154517287635121.978 +-1303552776 1646333132 lwr alg cni 1949-11-02 22:37:06.981381000 1987-11-16 11:36:35.377907000 20997536043026952806589333439672613.49 +141800220 -1216781788 sny upb oju 1948-07-27 02:30:19.961738000 1939-03-17 18:35:27.517627000 18218316900435064945700612873508796.509 +1479213782 215201660 nid cwr cwi 1933-12-23 10:42:03.843874000 1939-02-06 16:19:36.256705000 -11632653228938174185312443022199381.528 +1559824292 214808438 xsn mhc oje 1998-07-30 17:08:49.015610000 1963-07-09 11:37:51.858310000 -7354135332503394389877626870161890.016 +-1289921080 1646595280 oau qcw kvq 1960-10-03 08:41:03.140928000 1987-12-12 13:07:09.551855000 -10122969802153676821810023128136391.924 +47689088 -1216519640 jep xse xje 1946-03-01 16:52:47.023064000 1987-11-03 10:51:18.290933000 -18706291998968261986362166059359056.746 +128299598 -1216912862 toa ido kfq 1937-09-08 17:11:40.889166000 1939-03-04 17:50:10.430653000 -14427774102533482190927349907321565.234 +1573455988 215070586 bvq dxs wrm 1972-12-17 03:14:20.568310000 1938-12-29 14:03:44.995783000 4076231419551354498521813430605647.287 +-1384032212 1646857428 fql tfa kfa 1994-11-26 11:24:29.057293000 1963-06-26 10:52:34.771336000 16760557155295423300430583463868495.682 +-1303421702 1646464206 pbv epk grm 1949-11-15 23:22:24.068355000 1987-11-29 12:21:52.464881000 21039075051730203095865399615905987.194 +141931294 -1216650714 wrd ytf sny 1948-08-09 03:15:37.048712000 1987-09-25 08:35:27.030011000 18259855909138315234976679049742170.213 +1479344856 215332734 rmh gbv gbm 1934-01-05 11:27:20.930848000 1939-02-19 17:04:53.343679000 -11591114220234923896036376845966007.824 +1559955366 214939512 cwr qlg sni 1998-08-12 17:54:06.102584000 1963-07-22 12:23:08.945284000 -7312596323800144100601560693928516.312 +-1289790006 1646726354 sey ugb oau 1960-10-16 09:26:20.227902000 1963-05-18 08:36:43.510414000 -10081430793450426532533956951903018.22 +47820162 -1216388566 nit cwi cni 1946-03-14 17:38:04.110038000 1987-11-16 11:36:35.377907000 -18664752990265011697086099883125683.042 +128430672 -1216781788 xse mhs oju 1937-09-21 17:56:57.976140000 1939-03-17 18:35:27.517627000 -14386235093830231901651283731088191.53 +1573587062 215201660 fau hcw bvq 1972-12-30 03:59:37.655284000 1939-01-11 14:49:02.082757000 4117770428254604787797879606839020.991 +-1383901138 1646988502 jup xje oje 1994-12-09 12:09:46.144266000 1963-07-09 11:37:51.858310000 16802096163998673589706649640101869.386 +-1303290628 1646595280 tfa ito kvq 1949-11-29 00:07:41.155329000 1987-12-12 13:07:09.551855000 21080614060433453385141465792139360.898 +142062368 \N bvh dxj wrd 1948-08-22 04:00:54.135686000 1987-10-08 09:20:44.116985000 18301394917841565524252745225975543.917 +1479475930 -1216519640 vql kfa kfq 1934-01-18 12:12:38.017822000 1939-03-04 17:50:10.430653000 -11549575211531673606760310669732634.12 +1560086440 215463808 gbv upk wrm 1998-08-25 18:39:23.189558000 1938-12-29 14:03:44.995783000 -7271057315096893811325494517695142.608 +-1289658932 215070586 wid ykf sey 1960-10-29 10:11:37.314876000 1963-05-31 09:22:00.597388000 -10039891784747176243257890775669644.516 +47951236 1646857428 rmx gbm grm 1946-03-27 18:23:21.197012000 1987-11-29 12:21:52.464881000 -18623213981561761407810033706892309.338 +128561746 -1216257492 cwi qlw sny 1937-10-04 18:42:15.063114000 1987-09-25 08:35:27.030011000 -14344696085126981612375217554854817.826 +1573718136 -1216650714 jey lgb fau 1973-01-12 04:44:54.742258000 1939-01-24 15:34:19.169731000 4159309436957855077073945783072394.695 +-1383770064 215332734 nyt cni sni 1994-12-22 12:55:03.231240000 1963-07-22 12:23:08.945284000 16843635172701923878982715816335243.09 +-1303159554 214677364 xje mxs oau 1949-12-12 00:52:58.242303000 1963-05-18 08:36:43.510414000 21122153069136703674417531968372734.602 +142193442 1646726354 fal hcn bvh 1948-09-04 04:46:11.222660000 1987-10-21 10:06:01.203959000 18342933926544815813528811402208917.621 +1479607004 -1216388566 aup oje oju 1934-01-31 12:57:55.104796000 1939-03-17 18:35:27.517627000 -11508036202828423317484244493499260.416 +1560217514 -1217043936 kfa yto bvq 1998-09-07 19:24:40.276532000 1939-01-11 14:49:02.082757000 -7229518306393643522049428341461768.904 +-1289527858 215201660 bmh doj gbv 1960-11-11 10:56:54.401850000 1963-06-13 10:07:17.684362000 -9998352776043925953981824599436270.812 +48082310 1646988502 vqc kfq kvq 1946-04-09 19:08:38.283986000 1987-12-12 13:07:09.551855000 -18581674972858511118533967530658935.634 +128692820 1646333132 gbm upb wrd 1937-10-17 19:27:32.150088000 1987-10-08 09:20:44.116985000 -14303157076423731323099151378621444.122 +1573849210 -1216519640 nid pkf cwi 1973-01-25 05:30:11.829231000 1939-02-06 16:19:36.256705000 4200848445661105366350011959305768.399 +-1383638990 215463808 rdx grm wrm 1995-01-04 13:40:20.318214000 1938-12-29 14:03:44.995783000 16885174181405174168258781992568616.794 +-1303028480 214808438 cni qcw sey 1949-12-25 01:38:15.329277000 1963-05-31 09:22:00.597388000 21163692077839953963693598144606108.306 +142324516 1646857428 jep lgr xje 1948-09-17 05:31:28.309634000 1987-11-03 10:51:18.290933000 18384472935248066102804877578442291.325 +1479738078 -1216257492 eyt sni sny 1934-02-13 13:43:12.191770000 1987-09-25 08:35:27.030011000 -11466497194125173028208178317265886.712 +1560348588 -1216912862 oje dxs fau 1998-09-20 20:09:57.363506000 1939-01-24 15:34:19.169731000 -7187979297690393232773362165228395.2 +-1289396784 215332734 fql hsn kfa 1960-11-24 11:42:11.488824000 1963-06-26 10:52:34.771336000 -9956813767340675664705758423202897.108 +48213384 214677364 aug oju oau 1946-04-22 19:53:55.370960000 1963-05-18 08:36:43.510414000 -18540135964155260829257901354425561.93 +128823894 1646464206 kfq ytf bvh 1937-10-30 20:12:49.237062000 1987-10-21 10:06:01.203959000 -14261618067720481033823085202388070.418 +1573980284 -1216388566 rmh toj gbm 1973-02-07 06:15:28.916205000 1939-02-19 17:04:53.343679000 4242387454364355655626078135539142.103 +-1383507916 -1217043936 vhc kvq bvq 1995-01-17 14:25:37.405188000 1939-01-11 14:49:02.082757000 16926713190108424457534848168801990.498 +-1302897406 214939512 grm ugb gbv 1950-01-07 02:23:32.416251000 1963-06-13 10:07:17.684362000 21205231086543204252969664320839482.01 +142455590 1646988502 nit pkv cni 1948-09-30 06:16:45.396608000 1987-11-16 11:36:35.377907000 18426011943951316392080943754675665.029 +1479869152 1646333132 idx wrm wrd 1934-02-26 14:28:29.278744000 1987-10-08 09:20:44.116985000 -11424958185421922738932112141032513.008 +1560479662 -1216781788 sni hcw cwi 1998-10-03 20:55:14.450480000 1939-02-06 16:19:36.256705000 -7146440288987142943497295988995021.496 +-1289265710 215463808 jup lwr oje 1960-12-07 12:27:28.575798000 1963-07-09 11:37:51.858310000 -9915274758637425375429692246969523.404 +48344458 214808438 eyk sny sey 1946-05-05 20:39:12.457934000 1963-05-31 09:22:00.597388000 -18498596955452010539981835178192188.226 +128954968 1646595280 \N dxj xje 1937-11-12 20:58:06.324036000 1987-11-03 10:51:18.290933000 -14220079059017230744547019026154696.714 +1574111358 -1216257492 oju xsn kfq 1973-02-20 07:00:46.003179000 1939-03-04 17:50:10.430653000 4283926463067605944902144311772515.807 +-1383376842 -1216912862 vql oau fau 1995-01-30 15:10:54.492162000 1939-01-24 15:34:19.169731000 16968252198811674746810914345035364.202 +-1302766332 215070586 alg ykf kfa 1950-01-20 03:08:49.503225000 1963-06-26 10:52:34.771336000 21246770095246454542245730497072855.714 +142586664 214677364 kvq toa grm 1948-10-13 07:02:02.483582000 1987-11-29 12:21:52.464881000 18467550952654566681357009930909038.733 +1480000226 1646464206 rmx bvq bvh 1934-03-11 15:13:46.365718000 1987-10-21 10:06:01.203959000 -11383419176718672449656045964799139.304 +1560610736 -1216650714 mhc lgb gbm 1998-10-16 21:40:31.537454000 1939-02-19 17:04:53.343679000 -7104901280283892654221229812761647.792 +-1289134636 -1217043936 wrm pbv sni 1960-12-20 13:12:45.662772000 1963-07-22 12:23:08.945284000 -9873735749934175086153626070736149.7 +48475532 214939512 nyt wrd gbv 1946-05-18 21:24:29.544908000 1963-06-13 10:07:17.684362000 -18457057946748760250705769001958814.522 +129086042 1646726354 ido hcn cni 1937-11-25 21:43:23.411010000 1987-11-16 11:36:35.377907000 -14178540050313980455270952849921323.01 +1574242432 1646333132 sny cwr oju 1973-03-05 07:46:03.090153000 1939-03-17 18:35:27.517627000 4325465471770856234178210488005889.511 +-1383245768 -1216781788 aup sey cwi 1995-02-12 15:56:11.579136000 1939-02-06 16:19:36.256705000 17009791207514925036086980521268737.906 +-1302635258 215201660 epk doj oje 1950-02-02 03:54:06.590199000 1963-07-09 11:37:51.858310000 20661171391050865060883708820716.202 +142717738 \N oau xse kvq 1948-10-26 07:47:19.570556000 1987-12-12 13:07:09.551855000 18509089961357816970633076107142412.437 +1480131300 214808438 vqc fau xje 1934-03-24 15:59:03.452692000 1987-11-03 10:51:18.290933000 -11341880168015422160379979788565765.6 +1560741810 1646595280 qlg pkf kfq 1998-10-29 22:25:48.624428000 1939-03-04 17:50:10.430653000 -7063362271580642364945163636528274.088 +-1289003562 -1216519640 bvq tfa wrm 1961-01-02 13:58:02.749746000 1938-12-29 14:03:44.995783000 -9832196741230924796877559894502775.996 +48606606 -1216912862 rdx bvh kfa 1946-05-31 22:09:46.631882000 1963-06-26 10:52:34.771336000 -18415518938045509961429702825725440.818 +129217116 215070586 mhs lgr grm 1937-12-08 22:28:40.497983000 1987-11-29 12:21:52.464881000 -14137001041610730165994886673687949.306 +1574373506 1646857428 wrd gbv sny 1973-03-18 08:31:20.177127000 1987-09-25 08:35:27.030011000 4367004480474106523454276664239263.215 +-1383114694 1646464206 eyt wid gbm 1995-02-25 16:41:28.666110000 1939-02-19 17:04:53.343679000 17051330216218175325363046697502111.61 +-1302504184 -1216650714 ito hsn sni 1950-02-15 04:39:23.677173000 1963-07-22 12:23:08.945284000 62200180094301154336949885054089.906 +142848812 215332734 sey cwi oau 1948-11-08 08:32:36.657530000 1963-05-18 08:36:43.510414000 18550628970061067259909142283375786.141 +1480262374 214939512 aug jey cni \N 1987-11-16 11:36:35.377907000 -11300341159312171871103913612332391.896 +1560872884 1646726354 upk toj oju 1934-04-06 16:44:20.539666000 1939-03-17 18:35:27.517627000 -7021823262877392075669097460294900.384 +-1288872488 -1216388566 fau xje bvq 1998-11-11 23:11:05.711402000 1939-01-11 14:49:02.082757000 -9790657732527674507601493718269402.292 +48737680 -1216781788 vhc fal oje 1961-01-15 14:43:19.836720000 1963-07-09 11:37:51.858310000 -18373979929342259672153636649492067.114 +129348190 215201660 qlw pkv kvq 1946-06-13 22:55:03.718856000 1987-12-12 13:07:09.551855000 -14095462032907479876718820497454575.602 +1574504580 1646988502 bvh kfa wrd 1937-12-21 23:13:57.584957000 1987-10-08 09:20:44.116985000 4408543489177356812730342840472636.919 +-1382983620 1646595280 idx bmh kfq 1973-03-31 09:16:37.264101000 1939-03-04 17:50:10.430653000 17092869224921425614639112873735485.314 +-1302373110 -1216519640 mxs lwr wrm 1995-03-10 17:26:45.753084000 1938-12-29 14:03:44.995783000 103739188797551443613016061287463.61 +142979886 215463808 wid gbm sey 1950-02-28 05:24:40.764147000 1963-05-31 09:22:00.597388000 18592167978764317549185208459609159.845 +1480393448 215070586 eyk nid grm 1948-11-21 09:17:53.744504000 1987-11-29 12:21:52.464881000 -11258802150608921581827847436099018.192 +1561003958 1646857428 yto xsn sny 1934-04-19 17:29:37.626640000 1987-09-25 08:35:27.030011000 -6980284254174141786393031284061526.68 +-1288741414 -1216257492 jey cni fau 1998-11-24 23:56:22.798376000 1939-01-24 15:34:19.169731000 -9749118723824424218325427542036028.588 +48868754 -1216650714 alg jep sni 1961-01-28 15:28:36.923694000 1963-07-22 12:23:08.945284000 -18332440920639009382877570473258693.41 +129479264 215332734 upb toa oau 1946-06-26 23:40:20.805830000 1963-05-18 08:36:43.510414000 -14053923024204229587442754321221201.898 +1574635654 214677364 fal oje bvh 1938-01-03 23:59:14.671931000 1987-10-21 10:06:01.203959000 4450082497880607102006409016706010.623 +-1382852546 1646726354 mhc fql oju 1973-04-13 10:01:54.351075000 1939-03-17 18:35:27.517627000 17134408233624675903915179049968859.018 +-1302242036 -1216388566 qcw pbv bvq 1995-03-23 18:12:02.840058000 1939-01-11 14:49:02.082757000 145278197500801732889082237520837.314 +143110960 -1217043936 bmh kfq gbv 1950-03-13 06:09:57.851121000 1963-06-13 10:07:17.684362000 18633706987467567838461274635842533.549 +1480524522 215201660 ido rmh kvq 1948-12-04 10:03:10.831478000 1987-12-12 13:07:09.551855000 -11217263141905671292551781259865644.488 diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.sh b/tests/queries/0_stateless/02998_native_parquet_reader.sh new file mode 100755 index 00000000000..d6369c4921b --- /dev/null +++ b/tests/queries/0_stateless/02998_native_parquet_reader.sh @@ -0,0 +1,211 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +PAR_PATH="$CURDIR"/data_parquet/native_parquet_reader.parquet +# the content of parquet file can be generated by following codes +# < +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# +# namespace +# { +# +# using namespace DB; +# +# const UInt32 ROW_NUM = 2000; +# const UInt32 MIN_STRING_LEN = 3; +# const UInt32 MAX_STRING_LEN = 5; +# +# const UInt32 PLAIN_ENCODING_CARDINALITY = ROW_NUM * 2; +# const UInt32 MIX_ENCODING_CARDINALITY = 800; +# const UInt32 DICT_ENCODING_CARDINALITY = 20; +# +# UInt16 nextNum() +# { +# static UInt16 idx = 0; +# static UInt16 nums[] = {0, 21845, 43690}; +# static size_t nums_len = sizeof(nums) / sizeof(nums[0]); +# return nums[(idx++) % nums_len]++; +# } +# +# template +# void generateValues(MutableColumnPtr & col, size_t num) +# { +# using FieldType = typename NumericDataType::FieldType; +# +# const size_t next_num_bytes = sizeof(nextNum()); +# char bytewise_val[sizeof(FieldType)]; +# +# while (col->size() < num) +# { +# for (auto bytes = 0; bytes < sizeof(FieldType); bytes += next_num_bytes) +# { +# auto tmp = nextNum(); +# memcpy(bytewise_val + bytes, &tmp, std::min(next_num_bytes, sizeof(FieldType) - bytes)); +# } +# if (is_decimal) +# { +# // clean highest 3 bits, make sure the result doest not exceed the limits of the decimal type +# if (bytewise_val[sizeof(FieldType) - 1] > 0) +# bytewise_val[sizeof(FieldType) - 1] &= 0x0f; +# else +# bytewise_val[sizeof(FieldType) - 1] |= 0xf0; +# } +# FieldType val; +# memcpy(&val, &bytewise_val, sizeof(FieldType)); +# col->insert(val); +# } +# } +# +# template <> +# void generateValues(MutableColumnPtr & col, size_t num) +# { +# std::string str; +# while (col->size() < num) +# { +# auto len = MIN_STRING_LEN + nextNum() % (MAX_STRING_LEN - MIN_STRING_LEN); +# str.clear(); +# for (size_t i = 0; i < len; i++) +# { +# str.push_back('a' + nextNum() % ('z' - 'a')); +# } +# col->insert(str); +# } +# } +# +# template +# ColumnWithTypeAndName generateColumn( +# std::shared_ptr ch_type, +# size_t cardinality, +# const std::string & col_name, +# const std::set & null_indice) +# { +# DataTypePtr col_type = ch_type; +# if (!null_indice.empty()) +# { +# col_type = std::make_shared(ch_type); +# } +# +# auto values = ch_type->createColumn(); +# values->reserve(cardinality); +# generateValues(values, cardinality); +# +# auto col = col_type->createColumn(); +# col->reserve(ROW_NUM); +# for (size_t i = 0; i < ROW_NUM; i++) +# { +# if (!null_indice.empty() && null_indice.contains(i)) +# { +# col->insert(Null()); +# } +# else +# { +# col->insert(values->operator[](nextNum() % cardinality)); +# } +# } +# return {std::move(col), col_type, col_name}; +# } +# +# Block generateBlock() +# { +# ColumnsWithTypeAndName cols; +# +# // test Int32 type +# std::set null_indice{512, 1001, 211, 392, 553, 1725}; +# // Nullability is expressed by definition level, and encoded by bit packed with smallest group size of 8 +# // when null value appeared. Here we make a big bit packed group with more than 1000 values. +# for (size_t i = 0; i < 170; i++) +# { +# null_indice.emplace(622 + i * 6); +# } +# cols.emplace_back(generateColumn( +# std::make_shared(), PLAIN_ENCODING_CARDINALITY, "plain_encoding_i32", null_indice)); +# null_indice = {917, 482, 283, 580, 1926, 1667, 1971}; +# cols.emplace_back(generateColumn( +# std::make_shared(), DICT_ENCODING_CARDINALITY, "dict_encoding_i32", null_indice)); +# +# // test string type +# null_indice = {818, 928, 1958, 1141, 1553, 1407, 690, 1769}; +# cols.emplace_back(generateColumn( +# std::make_shared(), PLAIN_ENCODING_CARDINALITY, "plain_encoding_str", null_indice)); +# null_indice = {1441, 1747, 216, 1209, 89, 52, 536, 625}; +# cols.emplace_back(generateColumn( +# std::make_shared(), MIX_ENCODING_CARDINALITY, "mix_encoding_str", null_indice)); +# null_indice = {1478, 1862, 894, 1314, 1844, 243, 869, 551}; +# cols.emplace_back(generateColumn( +# std::make_shared(), DICT_ENCODING_CARDINALITY, "dict_encoding_str", null_indice)); +# +# // test DateTime64 type +# auto dt_type = std::make_shared(ParquetRecordReader::default_datetime64_scale); +# null_indice = {1078, 112, 1981, 795, 371, 1176, 1526, 11}; +# cols.emplace_back(generateColumn(dt_type, PLAIN_ENCODING_CARDINALITY, "plain_encoding_dt64", null_indice)); +# null_indice = {1734, 1153, 1893, 1205, 644, 1670, 1482, 1479}; +# cols.emplace_back(generateColumn(dt_type, DICT_ENCODING_CARDINALITY, "dict_encoding_dt64", null_indice)); +# +# // test Decimal128 type +# auto d128_type = std::make_shared(DecimalUtils::max_precision, 3); +# null_indice = {852, 1448, 1569, 896, 1866, 1655, 100, 418}; +# cols.emplace_back(generateColumn(d128_type, PLAIN_ENCODING_CARDINALITY, "plain_encoding_decimal128", null_indice)); +# +# return {cols}; +# } +# +# void dumpBlock(const Block & block) +# { +# WriteBufferFromFile output_buf("/tmp/ut-out.csv"); +# auto out = getContext().context->getOutputFormat("CSVWithNames", output_buf, block); +# out->write(block); +# out->finalize(); +# std::cerr << block.dumpStructure() << std::endl << std::endl; +# } +# +# } +# +# EndOfCodes +# +# How to generate the parquet file: +# 1. Use above C++ codes. +# Put above codes in src/Common/tests/gtest_main.cpp, add following two inlines in main function: +# tryRegisterFormats(); +# dumpBlock(generateBlock()); +# 2. Genetate /tmp/ut-out.csv. +# After compiled, run any test, such as "./src/unit_tests_dbms --gtest_filter=IColumn.dumpStructure", +# 3. Generate the parquet file by following spark sql +# create temporary view tv using csv options('path' '/tmp/ut-out.csv', 'header' 'true', 'nullValue' '\\N'); +# insert overwrite directory "/tmp/test-parquet" using Parquet +# options('parquet.dictionary.page.size' '500') +# select /*+ COALESCE(1) */ cast(plain_encoding_i32 as int), cast(dict_encoding_i32 as int), +# plain_encoding_str, mix_encoding_str, dict_encoding_str, +# cast(plain_encoding_dt64 as timestamp), cast(dict_encoding_dt64 as timestamp), +# cast(plain_encoding_decimal128 as decimal(38, 3)) +# from tv; +# + +CH_SCHEMA="\ + plain_encoding_i32 Nullable(Int32), \ + dict_encoding_i32 Nullable(Int32), \ + plain_encoding_str Nullable(String), \ + mix_encoding_str Nullable(String), \ + dict_encoding_str LowCardinality(Nullable(String)), \ + plain_encoding_dt64 Nullable(DateTime64(9, \\'UTC\\')), \ + dict_encoding_dt64 Nullable(DateTime64(9, \\'UTC\\')), \ + plain_encoding_decimal128 Nullable(Decimal(38, 3))" +QUERY="SELECT * from file('$PAR_PATH', 'Parquet', '$CH_SCHEMA')" + +# there may be more than on group in parquet files, unstable results may generated by multithreads +$CLICKHOUSE_LOCAL --multiquery --max_threads 1 --max_parsing_threads 1 --input_format_parquet_use_native_reader true --query "$QUERY" diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sql b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sql index 514b4227b71..be4b16f1264 100644 --- a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sql +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sql @@ -1,4 +1,4 @@ --- Tags: no-replicated-database, no-fasttest +-- Tags: no-replicated-database, no-fasttest, no-parallel, no-random-settings, no-random-merge-tree-settings DROP TABLE IF EXISTS 03000_traverse_shadow_system_data_path_table; diff --git a/tests/queries/0_stateless/03004_force_null_for_omitted.reference b/tests/queries/0_stateless/03004_force_null_for_omitted.reference new file mode 100644 index 00000000000..a4c928aae8c --- /dev/null +++ b/tests/queries/0_stateless/03004_force_null_for_omitted.reference @@ -0,0 +1,44 @@ +0 0 +0 0 +2 0 +0 0 +4 0 +0 \N +0 \N +2 \N +0 \N +4 \N +0 \N +0 \N +2 \N +0 \N +4 \N +0 \N +0 \N +2 \N +0 \N +4 \N +0 \N +0 \N +2 \N +0 \N +4 \N +0 +0 \N +1 \N +1 \N +1 \N +1 \N +1 0 +1 \N +1 \N +1 2 +3 0 +1 0 +1 \N +1 \N +1 2 +3 0 +1 0 +1 \N +1 \N diff --git a/tests/queries/0_stateless/03004_force_null_for_omitted.sql b/tests/queries/0_stateless/03004_force_null_for_omitted.sql new file mode 100644 index 00000000000..43ba2568acb --- /dev/null +++ b/tests/queries/0_stateless/03004_force_null_for_omitted.sql @@ -0,0 +1,36 @@ +set allow_suspicious_low_cardinality_types = 1; +insert into function file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null Nullable(UInt32)') select number % 2 ? NULL : number from numbers(5) settings engine_file_truncate_on_insert=1; +select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo UInt32'); +select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo UInt32') settings input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH } +select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo Nullable(UInt32)'); +select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo Nullable(UInt32)') settings input_format_force_null_for_omitted_fields = 1; +select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo LowCardinality(Nullable(UInt32))'); +select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo LowCardinality(Nullable(UInt32))') settings input_format_force_null_for_omitted_fields = 1; + +select * from format(JSONEachRow, 'foo UInt32', '{}'); +select * from format(JSONEachRow, 'foo UInt32', '{}') settings input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH } +select * from format(JSONEachRow, 'foo UInt32, bar Nullable(UInt32)', '{}'); +select * from format(JSONEachRow, 'foo UInt32, bar Nullable(UInt32)', '{\"foo\":1}'); +select * from format(JSONEachRow, 'foo UInt32, bar Nullable(UInt32)', '{}') settings input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH } +select * from format(JSONEachRow, 'foo UInt32, bar Nullable(UInt32)', '{\"foo\":1}') settings input_format_force_null_for_omitted_fields = 1; +select * from format(JSONEachRow, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', '{\"foo\":1}'); +select * from format(JSONEachRow, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', '{\"foo\":1}') settings input_format_force_null_for_omitted_fields = 1; + +select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\nUInt32\n1'); +select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH } +select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar Nullable(UInt32)', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1; +select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1; +select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo,bar\nUInt32,UInt32\n1,2\n3\n') settings input_format_csv_allow_variable_number_of_columns = 1; +select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo,bar\nUInt32,UInt32\n1,2\n3\n') settings input_format_csv_allow_variable_number_of_columns = 1, input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH } + +select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\nUInt32\n1'); +select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH } +select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar Nullable(UInt32)', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1; +select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1; +select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\tbar\nUInt32\tUInt32\n1\t2\n3\n') settings input_format_tsv_allow_variable_number_of_columns = 1; +select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\tbar\nUInt32\tUInt32\n1\t2\n3\n') settings input_format_tsv_allow_variable_number_of_columns = 1, input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH } + +select * from format(TSKV, 'foo UInt32, bar UInt32', 'foo=1\n'); +select * from format(TSKV, 'foo UInt32, bar UInt32', 'foo=1\n') settings input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH } +select * from format(TSKV, 'foo UInt32, bar Nullable(UInt32)', 'foo=1\n') settings input_format_force_null_for_omitted_fields = 1; +select * from format(TSKV, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', 'foo=1\n') settings input_format_force_null_for_omitted_fields = 1; diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.reference b/tests/queries/0_stateless/03008_local_plain_rewritable.reference new file mode 100644 index 00000000000..f69e11a3ca3 --- /dev/null +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.reference @@ -0,0 +1,24 @@ +10006 +0 0 0 +1 1 1 +1 2 0 +2 2 2 +2 2 2 +3 1 9 +3 3 3 +4 4 4 +4 7 7 +5 5 5 +1 +10006 +0 0 0 +1 1 1 +1 2 0 +2 2 2 +2 2 2 +3 1 9 +3 3 3 +4 4 4 +4 7 7 +5 5 5 +0 diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh new file mode 100755 index 00000000000..5fac964a219 --- /dev/null +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-s3-storage, no-replicated-database, no-shared-merge-tree +# Tag no-random-settings: enable after root causing flakiness + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "drop table if exists 03008_test_local_mt sync" + +${CLICKHOUSE_CLIENT} -nm --query " +create table 03008_test_local_mt (a Int32, b Int64, c Int64) +engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) +settings disk = disk( + name = 03008_local_plain_rewritable, + type = object_storage, + object_storage_type = local, + metadata_type = plain_rewritable, + path = '/var/lib/clickhouse/disks/local_plain_rewritable/') +" + +${CLICKHOUSE_CLIENT} -nm --query " +insert into 03008_test_local_mt (*) values (1, 2, 0), (2, 2, 2), (3, 1, 9), (4, 7, 7), (5, 10, 2), (6, 12, 5); +insert into 03008_test_local_mt (*) select number, number, number from numbers_mt(10000); +" + +${CLICKHOUSE_CLIENT} -nm --query " +select count(*) from 03008_test_local_mt; +select (*) from 03008_test_local_mt order by tuple(a, b) limit 10; +" + +${CLICKHOUSE_CLIENT} --query "optimize table 03008_test_local_mt final;" + +${CLICKHOUSE_CLIENT} -nm --query " +alter table 03008_test_local_mt modify setting disk = '03008_local_plain_rewritable', old_parts_lifetime = 3600; +select engine_full from system.tables WHERE database = currentDatabase() AND name = '03008_test_local_mt'; +" | grep -c "old_parts_lifetime = 3600" + +${CLICKHOUSE_CLIENT} -nm --query " +select count(*) from 03008_test_local_mt; +select (*) from 03008_test_local_mt order by tuple(a, b) limit 10; +" + +${CLICKHOUSE_CLIENT} -nm --query " +alter table 03008_test_local_mt update c = 0 where a % 2 = 1; +alter table 03008_test_local_mt add column d Int64 after c; +alter table 03008_test_local_mt drop column c; +" 2>&1 | grep -Fq "SUPPORT_IS_DISABLED" + +${CLICKHOUSE_CLIENT} -nm --query " +truncate table 03008_test_local_mt; +select count(*) from 03008_test_local_mt; +" + +${CLICKHOUSE_CLIENT} --query "drop table 03008_test_local_mt sync" diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.sh b/tests/queries/0_stateless/03008_s3_plain_rewritable.sh new file mode 100755 index 00000000000..4d5989f6f12 --- /dev/null +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-shared-merge-tree +# Tag no-fasttest: requires S3 +# Tag no-shared-merge-tree: does not support replication + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "drop table if exists test_s3_mt" + +${CLICKHOUSE_CLIENT} -nm --query " +create table test_s3_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) +settings disk = disk( + name = 03008_s3_plain_rewritable, + type = s3_plain_rewritable, + endpoint = 'http://localhost:11111/test/03008_test_s3_mt/', + access_key_id = clickhouse, + secret_access_key = clickhouse); +" + +${CLICKHOUSE_CLIENT} -nm --query " +insert into test_s3_mt (*) values (1, 2, 0), (2, 2, 2), (3, 1, 9), (4, 7, 7), (5, 10, 2), (6, 12, 5); +insert into test_s3_mt (*) select number, number, number from numbers_mt(10000); +select count(*) from test_s3_mt; +select (*) from test_s3_mt order by tuple(a, b) limit 10; +" + +${CLICKHOUSE_CLIENT} --query "optimize table test_s3_mt final" + +${CLICKHOUSE_CLIENT} -m --query " +alter table test_s3_mt add projection test_s3_mt_projection (select * order by b)" 2>&1 | grep -Fq "SUPPORT_IS_DISABLED" + +${CLICKHOUSE_CLIENT} -nm --query " +alter table test_s3_mt update c = 0 where a % 2 = 1; +alter table test_s3_mt add column d Int64 after c; +alter table test_s3_mt drop column c; +" 2>&1 | grep -Fq "SUPPORT_IS_DISABLED" + +${CLICKHOUSE_CLIENT} -nm --query " +detach table test_s3_mt; +attach table test_s3_mt; +" + +${CLICKHOUSE_CLIENT} --query "drop table if exists test_s3_mt_dst" + +${CLICKHOUSE_CLIENT} -m --query " +create table test_s3_mt_dst (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) +settings disk = '03008_s3_plain_rewritable' +" + +${CLICKHOUSE_CLIENT} -m --query " +alter table test_s3_mt move partition 0 to table test_s3_mt_dst" 2>&1 | grep -Fq "SUPPORT_IS_DISABLED" + +${CLICKHOUSE_CLIENT} --query "drop table test_s3_mt sync" diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql deleted file mode 100644 index af02ebbd874..00000000000 --- a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Tags: no-fasttest --- Tag: no-fasttest -- requires S3 - -drop table if exists test_mt; -create table test_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) -settings disk = disk( - name = s3_plain_rewritable, - type = s3_plain_rewritable, - endpoint = 'http://localhost:11111/test/test_mt/', - access_key_id = clickhouse, - secret_access_key = clickhouse); - -insert into test_mt (*) values (1, 2, 0), (2, 2, 2), (3, 1, 9), (4, 7, 7), (5, 10, 2), (6, 12, 5); -insert into test_mt (*) select number, number, number from numbers_mt(10000); - -select count(*) from test_mt; -select (*) from test_mt order by tuple(a, b) limit 10; - -optimize table test_mt final; - -alter table test_mt add projection test_mt_projection ( - select * order by b); -- { serverError SUPPORT_IS_DISABLED } - -alter table test_mt update c = 0 where a % 2 = 1; -- { serverError SUPPORT_IS_DISABLED } -alter table test_mt add column d Int64 after c; -- { serverError SUPPORT_IS_DISABLED } -alter table test_mt drop column c; -- { serverError SUPPORT_IS_DISABLED } - -detach table test_mt; -attach table test_mt; - -drop table if exists test_mt_dst; - -create table test_mt_dst (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) -settings disk = 's3_plain_rewritable'; -alter table test_mt move partition 0 to table test_mt_dst; -- { serverError SUPPORT_IS_DISABLED } diff --git a/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.sql b/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.sql index fa4ba96277d..8dd96ae2efc 100644 --- a/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.sql +++ b/tests/queries/0_stateless/03009_storage_memory_circ_buffer_usage.sql @@ -57,7 +57,7 @@ INSERT INTO memory SELECT * FROM numbers(9000, 10000); SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); SELECT 'TESTING INVALID SETTINGS'; -CREATE TABLE faulty_memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100; -- { serverError 452 } -CREATE TABLE faulty_memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 100; -- { serverError 452 } +CREATE TABLE faulty_memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100; -- { serverError SETTING_CONSTRAINT_VIOLATION } +CREATE TABLE faulty_memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 100; -- { serverError SETTING_CONSTRAINT_VIOLATION } DROP TABLE memory; \ No newline at end of file diff --git a/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql b/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql index f1727cb9e5c..fee42d1abc6 100644 --- a/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql +++ b/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql @@ -1,6 +1,6 @@ set allow_suspicious_primary_key = 0; -DROP TABLE IF EXISTS data; +drop table if exists data; create table data (key Int, value AggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } @@ -12,7 +12,22 @@ create table data (key Int, value AggregateFunction(sum, UInt64)) engine=Aggrega create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() primary key value order by (value, key); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } set allow_suspicious_primary_key = 1; - create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() primary key value order by (value, key); -DROP TABLE data; +-- ATTACH should work regardless allow_suspicious_primary_key +set allow_suspicious_primary_key = 0; +detach table data; +attach table data; +drop table data; + +-- ALTER AggregatingMergeTree +create table data (key Int) engine=AggregatingMergeTree() order by (key); +alter table data add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } +alter table data add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value) settings allow_suspicious_primary_key=1; +drop table data; + +-- ALTER ReplicatedAggregatingMergeTree +create table data_rep (key Int) engine=ReplicatedAggregatingMergeTree('/tables/{database}', 'r1') order by (key); +alter table data_rep add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } +alter table data_rep add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value) settings allow_suspicious_primary_key=1; +drop table data_rep; diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference index 17a17484a0c..02ea01eb2e6 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference @@ -66,3 +66,61 @@ a a a a a a +0 0 +0 \N +1 2 +1 \N +2 4 +2 \N +\N 0 +\N 2 +\N 4 +\N \N +0 0 nan +2 4 nan +1 2 nan +2 \N nan +0 \N nan +1 \N nan +\N 2 nan +\N 0 nan +\N 4 nan +\N \N nan +[] +['.'] +['.','.'] +['.','.','.'] +['.','.','.','.'] +['.','.','.','.','.'] +['.','.','.','.','.','.'] +['.','.','.','.','.','.','.'] +['.','.','.','.','.','.','.','.'] +['.','.','.','.','.','.','.','.','.'] +[] +[] +[] +[] +[] +[] +[] +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +10 diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index 68710137542..b8c173520a9 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -21,3 +21,39 @@ SELECT tuple(number + 1) AS x FROM numbers(10) GROUP BY number + 1, toString(x) SELECT tuple(tuple(number)) AS x FROM numbers(10) WHERE toString(toUUID(tuple(number), NULL), x) GROUP BY number, (toString(x), number) WITH CUBE SETTINGS group_by_use_nulls = 1 FORMAT Null; SELECT materialize('a'), 'a' AS key GROUP BY key WITH CUBE WITH TOTALS SETTINGS group_by_use_nulls = 1; + +EXPLAIN QUERY TREE +SELECT a, b +FROM numbers(3) +GROUP BY number as a, (number + number) as b WITH CUBE +ORDER BY a, b format Null; + +SELECT a, b +FROM numbers(3) +GROUP BY number as a, (number + number) as b WITH CUBE +ORDER BY a, b; + +SELECT + a, + b, + cramersVBiasCorrected(a, b) +FROM numbers(3) +GROUP BY + number AS a, + number + number AS b + WITH CUBE +SETTINGS group_by_use_nulls = 1; + +SELECT arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_use_nulls=1; + +SELECT count('Lambda as function parameter') AS c FROM (SELECT ignore(ignore('Lambda as function parameter', 28, 28, 28, 28, 28, 28), 28), materialize('Lambda as function parameter'), 28, 28, 'world', 5 FROM system.numbers WHERE ignore(materialize('Lambda as function parameter'), materialize(toLowCardinality(28)), 28, 28, 28, 28, toUInt128(28)) LIMIT 2) GROUP BY GROUPING SETS ((toLowCardinality(0)), (toLowCardinality(toNullable(28))), (1)) HAVING nullIf(c, 10) < 50 ORDER BY c ASC NULLS FIRST settings group_by_use_nulls=1; -- { serverError ILLEGAL_AGGREGATION } + +SELECT arraySplit(x -> 0, []) WHERE materialize(1) GROUP BY (0, ignore('a')) WITH ROLLUP SETTINGS group_by_use_nulls = 1; + +SELECT arraySplit(x -> toUInt8(number), []) from numbers(1) GROUP BY toUInt8(number) WITH ROLLUP SETTINGS group_by_use_nulls = 1; + +SELECT arraySplit(number -> toUInt8(number), []) from numbers(1) GROUP BY toUInt8(number) WITH ROLLUP SETTINGS group_by_use_nulls = 1; + +SELECT count(arraySplit(number -> toUInt8(number), [arraySplit(x -> toUInt8(number), [])])) FROM numbers(10) GROUP BY number, [number] WITH ROLLUP settings group_by_use_nulls=1; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +SELECT count(arraySplit(x -> toUInt8(number), [])) FROM numbers(10) GROUP BY number, [number] WITH ROLLUP settings group_by_use_nulls=1; \ No newline at end of file diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect new file mode 100755 index 00000000000..de15a199132 --- /dev/null +++ b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect @@ -0,0 +1,49 @@ +#!/usr/bin/expect -f + +set basedir [file dirname $argv0] +set basename [file tail $argv0] +if {[info exists env(CLICKHOUSE_TMP)]} { + set CLICKHOUSE_TMP $env(CLICKHOUSE_TMP) +} else { + set CLICKHOUSE_TMP "." +} +exp_internal -f $CLICKHOUSE_TMP/$basename.debuglog 0 + +log_user 0 +set timeout 60 +match_max 100000 +set stty_init "rows 25 cols 120" + +expect_after { + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } +} + +spawn clickhouse-local +expect ":) " + +# Trivial SELECT with LIMIT from system.zeros shows progress bar. +send "SELECT * FROM system.zeros LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" +expect "Progress: " +expect "█" +send "\3" +expect "Query was cancelled." +expect ":) " + +send "SELECT * FROM system.zeros_mt LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" +expect "Progress: " +expect "█" +send "\3" +expect "Query was cancelled." +expect ":) " + +# As well as from generateRandom +send "SELECT * FROM generateRandom() LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" +expect "Progress: " +expect "█" +send "\3" +expect "Query was cancelled." +expect ":) " + +send "exit\r" +expect eof diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03024_total_rows_approx_is_set_for_system_zeros_and_generate_random.reference b/tests/queries/0_stateless/03024_total_rows_approx_is_set_for_system_zeros_and_generate_random.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03024_total_rows_approx_is_set_for_system_zeros_and_generate_random.sql b/tests/queries/0_stateless/03024_total_rows_approx_is_set_for_system_zeros_and_generate_random.sql new file mode 100644 index 00000000000..0db09ead2cd --- /dev/null +++ b/tests/queries/0_stateless/03024_total_rows_approx_is_set_for_system_zeros_and_generate_random.sql @@ -0,0 +1,9 @@ +SET max_rows_to_read = 1e11; + +SELECT * FROM system.numbers LIMIT 1e12 FORMAT Null; -- { serverError TOO_MANY_ROWS } +SELECT * FROM system.numbers_mt LIMIT 1e12 FORMAT Null; -- { serverError TOO_MANY_ROWS } + +SELECT * FROM system.zeros LIMIT 1e12 FORMAT Null; -- { serverError TOO_MANY_ROWS } +SELECT * FROM system.zeros_mt LIMIT 1e12 FORMAT Null; -- { serverError TOO_MANY_ROWS } + +SELECT * FROM generateRandom() LIMIT 1e12 FORMAT Null; -- { serverError TOO_MANY_ROWS } diff --git a/tests/queries/0_stateless/03032_multi_search_const_low_cardinality.reference b/tests/queries/0_stateless/03032_multi_search_const_low_cardinality.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/03032_multi_search_const_low_cardinality.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03032_multi_search_const_low_cardinality.sql b/tests/queries/0_stateless/03032_multi_search_const_low_cardinality.sql new file mode 100644 index 00000000000..bc5e5cff15c --- /dev/null +++ b/tests/queries/0_stateless/03032_multi_search_const_low_cardinality.sql @@ -0,0 +1 @@ +SELECT multiSearchFirstIndex(toLowCardinality(''), [toLowCardinality('')]) diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql index 1507107c37f..2815e8e04d0 100644 --- a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql @@ -67,8 +67,8 @@ SELECT total_rows FROM system.tables WHERE name = 'memory' and database = curren SELECT 'TESTING INVALID SETTINGS'; DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory; -ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100; -- { serverError 452 } -ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 100; -- { serverError 452 } +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100; -- { serverError SETTING_CONSTRAINT_VIOLATION } +ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 100; -- { serverError SETTING_CONSTRAINT_VIOLATION } ALTER TABLE memory MODIFY SETTING max_rows_to_keep = 1000; ALTER TABLE memory MODIFY SETTING max_bytes_to_keep = 1000; diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference new file mode 100644 index 00000000000..d965245266c --- /dev/null +++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference @@ -0,0 +1,55 @@ +JSON +{"d":"42","dynamicType(d)":"Int64"} +{"d":42.42,"dynamicType(d)":"Float64"} +{"d":"str","dynamicType(d)":"String"} +{"d":["1","2","3"],"dynamicType(d)":"Array(Int64)"} +{"d":"2020-01-01","dynamicType(d)":"Date"} +{"d":"2020-01-01 10:00:00.000000000","dynamicType(d)":"DateTime64(9)"} +{"d":{"a":"42","b":"str"},"dynamicType(d)":"Tuple(a Int64, b String)"} +{"d":{"a":"43"},"dynamicType(d)":"Tuple(a Int64)"} +{"d":{"a":"44","c":["1","2","3"]},"dynamicType(d)":"Tuple(a Int64, c Array(Int64))"} +{"d":["1","str",["1","2","3"]],"dynamicType(d)":"Tuple(Int64, String, Array(Int64))"} +{"d":null,"dynamicType(d)":"None"} +{"d":true,"dynamicType(d)":"Bool"} +{"d":"42","dynamicType(d)":"Int64"} +{"d":"42.42","dynamicType(d)":"String"} +{"d":"str","dynamicType(d)":"String"} +{"d":null,"dynamicType(d)":"None"} +{"d":"1","dynamicType(d)":"Int64"} +CSV +42,"Int64" +42.42,"Float64" +"str","String" +"[1,2,3]","Array(Int64)" +"2020-01-01","Date" +"2020-01-01 10:00:00.000000000","DateTime64(9)" +"[1, 'str', [1, 2, 3]]","String" +\N,"None" +true,"Bool" +TSV +42 Int64 +42.42 Float64 +str String +[1,2,3] Array(Int64) +2020-01-01 Date +2020-01-01 10:00:00.000000000 DateTime64(9) +[1, \'str\', [1, 2, 3]] String +\N None +true Bool +Values +(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00.000000000','DateTime64(9)'),(NULL,'None'),(true,'Bool') +Cast using parsing +42 Int64 +42.42 Float64 +[1,2,3] Array(Int64) +2020-01-01 Date +2020-01-01 10:00:00.000000000 DateTime64(9) +\N None +true Bool +42 Int64 +42.42 Float64 +[1, 2, 3] String +2020-01-01 String +2020-01-01 10:00:00 String +\N None +true String diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.sql b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql new file mode 100644 index 00000000000..d12d110fe28 --- /dev/null +++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql @@ -0,0 +1,74 @@ +set allow_experimental_dynamic_type = 1; + +select 'JSON'; +select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic', $$ +{"d" : 42} +{"d" : 42.42} +{"d" : "str"} +{"d" : [1, 2, 3]} +{"d" : "2020-01-01"} +{"d" : "2020-01-01 10:00:00"} +{"d" : {"a" : 42, "b" : "str"}} +{"d" : {"a" : 43}} +{"d" : {"a" : 44, "c" : [1, 2, 3]}} +{"d" : [1, "str", [1, 2, 3]]} +{"d" : null} +{"d" : true} +$$) format JSONEachRow; + +select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic(max_types=2)', $$ +{"d" : 42} +{"d" : 42.42} +{"d" : "str"} +{"d" : null} +{"d" : true} +$$) format JSONEachRow; + +select 'CSV'; +select d, dynamicType(d) from format(CSV, 'd Dynamic', +$$42 +42.42 +"str" +"[1, 2, 3]" +"2020-01-01" +"2020-01-01 10:00:00" +"[1, 'str', [1, 2, 3]]" +\N +true +$$) format CSV; + +select 'TSV'; +select d, dynamicType(d) from format(TSV, 'd Dynamic', +$$42 +42.42 +str +[1, 2, 3] +2020-01-01 +2020-01-01 10:00:00 +[1, 'str', [1, 2, 3]] +\N +true +$$) format TSV; + +select 'Values'; +select d, dynamicType(d) from format(Values, 'd Dynamic', $$ +(42) +(42.42) +('str') +([1, 2, 3]) +('2020-01-01') +('2020-01-01 10:00:00') +(NULL) +(true) +$$) format Values; +select ''; + +select 'Cast using parsing'; +drop table if exists test; +create table test (s String) engine=Memory; +insert into test values ('42'), ('42.42'), ('[1, 2, 3]'), ('2020-01-01'), ('2020-01-01 10:00:00'), ('NULL'), ('true'); +set cast_string_to_dynamic_use_inference=1; +select s::Dynamic as d, dynamicType(d) from test; +select s::Dynamic(max_types=3) as d, dynamicType(d) from test; +drop table test; + diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference new file mode 100644 index 00000000000..a30b755709b --- /dev/null +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference @@ -0,0 +1,2 @@ +Disabled 11338881281426660955 14765404159170880511 +Enabled 11338881281426660955 14765404159170880511 diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql new file mode 100644 index 00000000000..25a30a365a5 --- /dev/null +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql @@ -0,0 +1,23 @@ +-- Tags: no-random-settings, no-random-merge-tree-settings + +DROP TABLE IF EXISTS account_test; + +CREATE TABLE account_test +( + `id` UInt64, + `row_ver` UInt64, +) +ENGINE = ReplacingMergeTree(row_ver) +ORDER BY id +SETTINGS index_granularity = 16, index_granularity_bytes = 0, + min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, + min_rows_for_compact_part = 0, min_bytes_for_compact_part = 0; + +SYSTEM STOP MERGES account_test; + +INSERT INTO account_test VALUES (11338881281426660955,717769962224129342),(12484100559155738267,7950971667203174918),(7603729260199571867,3255798127676911942),(7023543111808724827,911615979861855126),(10293135086416484571,3264379259750736572),(15561193439904316763,8419819469587131454),(17632407413882870235,7252071832370181502),(17009726455991851227,7525297506591593939),(12392078953873778779,8473049173389293961),(15283366022689446555,11692491360262171467),(9087459014730986523,2783662960221838603),(293823584550906267,4847630088179732782),(15693186194430465755,8163804880526285623),(7353080168325584795,17315892478487497859),(5980311238303466523,6943353798059390089),(14242621660019578011,8684624667957352769),(8241843507567433563,15731952080102886438); +INSERT INTO account_test VALUES (11338881281426660955, 14765404159170880511); + +SELECT 'Disabled', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 0; +SELECT 'Enabled', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1; + diff --git a/tests/queries/0_stateless/03033_set_index_in.reference b/tests/queries/0_stateless/03033_set_index_in.reference new file mode 100644 index 00000000000..3800acc0458 --- /dev/null +++ b/tests/queries/0_stateless/03033_set_index_in.reference @@ -0,0 +1,3 @@ +32768 +49152 +32768 diff --git a/tests/queries/0_stateless/03033_set_index_in.sql b/tests/queries/0_stateless/03033_set_index_in.sql new file mode 100644 index 00000000000..ad42a576444 --- /dev/null +++ b/tests/queries/0_stateless/03033_set_index_in.sql @@ -0,0 +1,9 @@ +create table a (k UInt64, v UInt64, index i (v) type set(100) granularity 2) engine MergeTree order by k settings index_granularity=8192, index_granularity_bytes=1000000000, min_index_granularity_bytes=0; +insert into a select number, intDiv(number, 4096) from numbers(1000000); +select sum(1+ignore(*)) from a where indexHint(v in (20, 40)); +select sum(1+ignore(*)) from a where indexHint(v in (select 20 union all select 40 union all select 60)); + +SELECT 1 FROM a PREWHERE v IN (SELECT 1) WHERE v IN (SELECT 2); + +select 1 from a where indexHint(indexHint(materialize(0))); +select sum(1+ignore(*)) from a where indexHint(indexHint(v in (20, 40))); \ No newline at end of file diff --git a/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference new file mode 100644 index 00000000000..9dc0605fd5a --- /dev/null +++ b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.reference @@ -0,0 +1,8 @@ +Map(Nothing, ...) is non-comparable --> not usable as primary key +But Map(Nothing, ...) can be a non-primary-key, it is quite useless though ... +Map(Float32, ...) and Map(LC(String)) are okay as primary key +{1:'a'} {'b':'b'} +{2:'aa'} {'bb':'bb'} +Map(Float32, ...) and Map(LC(String)) as non-primary-key +{1:'a'} {'b':'b'} +{3:'aaa'} {'bb':'bb'} diff --git a/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql new file mode 100644 index 00000000000..a3cd59df1cd --- /dev/null +++ b/tests/queries/0_stateless/03034_ddls_and_merges_with_unusual_maps.sql @@ -0,0 +1,33 @@ +-- Tests maps with "unusual" key types (Float32, Nothing, LowCardinality(String)) + +SET mutations_sync = 2; + +DROP TABLE IF EXISTS tab; + +SELECT 'Map(Nothing, ...) is non-comparable --> not usable as primary key'; +CREATE TABLE tab (m1 Map(Nothing, String)) ENGINE = MergeTree ORDER BY m1; -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } + +SELECT 'But Map(Nothing, ...) can be a non-primary-key, it is quite useless though ...'; +CREATE TABLE tab (m3 Map(Nothing, String)) ENGINE = MergeTree ORDER BY tuple(); +-- INSERT INTO tab VALUES (map('', 'd')); -- { serverError NOT_IMPLEMENTED } -- The client can't serialize the data and fails. The query + -- doesn't reach the server and we can't check via 'serverError' :-/ +DROP TABLE tab; + +SELECT 'Map(Float32, ...) and Map(LC(String)) are okay as primary key'; +CREATE TABLE tab (m1 Map(Float32, String), m2 Map(LowCardinality(String), String)) ENGINE = MergeTree ORDER BY (m1, m2); +INSERT INTO tab VALUES (map(1.0, 'a'), map('b', 'b')); +INSERT INTO tab VALUES (map(2.0, 'aa'), map('bb', 'bb')); + +-- Test merge +OPTIMIZE TABLE tab FINAL; +SELECT * FROM tab ORDER BY m1, m2; + +DROP TABLE tab; + +SELECT 'Map(Float32, ...) and Map(LC(String)) as non-primary-key'; +CREATE TABLE tab (m1 Map(Float32, String), m2 Map(LowCardinality(String), String)) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO tab VALUES (map(1.0, 'a'), map('b', 'b')), (map(2.0, 'aa'), map('bb', 'bb')); +ALTER TABLE tab UPDATE m1 = map(3.0, 'aaa') WHERE m1 = map(2.0, 'aa'); +SELECT * FROM tab ORDER BY m1, m2; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.reference b/tests/queries/0_stateless/03034_dynamic_conversions.reference new file mode 100644 index 00000000000..45f94f7ecc4 --- /dev/null +++ b/tests/queries/0_stateless/03034_dynamic_conversions.reference @@ -0,0 +1,88 @@ +0 UInt64 +1 UInt64 +2 UInt64 +0 String +1 String +2 String +0 +1 +2 +0 +1 +2 +1970-01-01 +1970-01-02 +1970-01-03 +0 UInt64 +1 UInt64 +2 UInt64 +0 UInt64 +\N None +2 UInt64 +0 UInt64 +str_1 String +[0,1] Array(UInt64) +\N None +4 UInt64 +str_5 String +0 String +str_1 String +[0,1] String +\N None +4 String +str_5 String +0 UInt64 +str_1 String +[0,1] String +\N None +4 UInt64 +str_5 String +0 UInt64 +str_1 String +[0,1] Array(UInt64) +\N None +4 UInt64 +str_5 String +0 +1 +2 +0 +1 +2 +0 UInt64 +str_1 String +[0,1] String +\N None +4 UInt64 +str_5 String +0 UInt64 +1970-01-02 Date +[0,1] String +\N None +4 UInt64 +1970-01-06 Date +0 +42 +42.42 +1 +0 +\N +42 +42.42 +1 +0 + +42 +42.42 +true +e10 +\N +42 +42.42 +true +e10 +\N +42 +\N +1 +\N diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.sql b/tests/queries/0_stateless/03034_dynamic_conversions.sql new file mode 100644 index 00000000000..ed75fbf2377 --- /dev/null +++ b/tests/queries/0_stateless/03034_dynamic_conversions.sql @@ -0,0 +1,34 @@ +set allow_experimental_dynamic_type=1; +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +select number::Dynamic as d, dynamicType(d) from numbers(3); +select number::Dynamic(max_types=1) as d, dynamicType(d) from numbers(3); +select number::Dynamic::UInt64 as v from numbers(3); +select number::Dynamic::String as v from numbers(3); +select number::Dynamic::Date as v from numbers(3); +select number::Dynamic::Array(UInt64) as v from numbers(3); -- {serverError TYPE_MISMATCH} +select number::Dynamic::Variant(UInt64, String) as v, variantType(v) from numbers(3); +select (number % 2 ? NULL : number)::Dynamic as d, dynamicType(d) from numbers(3); + +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=1) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6); + +select number::Dynamic(max_types=2)::Dynamic(max_types=3) as d from numbers(3); +select number::Dynamic(max_types=2)::Dynamic(max_types=1) as d from numbers(3); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, toDate(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=4)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6); + + +create table test (d Dynamic) engine = Memory; +insert into test values (NULL), (42), ('42.42'), (true), ('e10'); +select d::Float64 from test; +select d::Nullable(Float64) from test; +select d::String from test; +select d::Nullable(String) from test; +select d::UInt64 from test; -- {serverError CANNOT_PARSE_TEXT} +select d::Nullable(UInt64) from test; +select d::Date from test; -- {serverError CANNOT_PARSE_DATE} + diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.reference b/tests/queries/0_stateless/03035_dynamic_sorting.reference new file mode 100644 index 00000000000..9b8df11c7a9 --- /dev/null +++ b/tests/queries/0_stateless/03035_dynamic_sorting.reference @@ -0,0 +1,299 @@ +order by d1 nulls first +\N None +\N None +\N None +\N None +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +order by d1 nulls last +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +\N None +\N None +\N None +\N None +order by d2 nulls first +\N None +\N None +\N None +\N None +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +order by d2 nulls last +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +\N None +\N None +\N None +\N None +order by d1, d2 nulls first +[1,2,3] \N Array(Int64) None +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +[1,2,3] abc Array(Int64) String +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 \N Int64 None +42 [1,2,3] Int64 Array(Int64) +42 42 Int64 Int64 +42 43 Int64 Int64 +42 abc Int64 String +43 42 Int64 Int64 +abc \N String None +abc [1,2,3] String Array(Int64) +abc 42 String Int64 +abc abc String String +abc abd String String +abd abc String String +\N \N None None +\N [1,2,3] None Array(Int64) +\N 42 None Int64 +\N abc None String +order by d1, d2 nulls last +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +[1,2,3] abc Array(Int64) String +[1,2,3] \N Array(Int64) None +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +42 42 Int64 Int64 +42 43 Int64 Int64 +42 abc Int64 String +42 \N Int64 None +43 42 Int64 Int64 +abc [1,2,3] String Array(Int64) +abc 42 String Int64 +abc abc String String +abc abd String String +abc \N String None +abd abc String String +\N [1,2,3] None Array(Int64) +\N 42 None Int64 +\N abc None String +\N \N None None +order by d2, d1 nulls first +\N [1,2,3] None Array(Int64) +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +abc [1,2,3] String Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +\N 42 None Int64 +[1,2,3] 42 Array(Int64) Int64 +42 42 Int64 Int64 +43 42 Int64 Int64 +abc 42 String Int64 +42 43 Int64 Int64 +\N abc None String +[1,2,3] abc Array(Int64) String +42 abc Int64 String +abc abc String String +abd abc String String +abc abd String String +\N \N None None +[1,2,3] \N Array(Int64) None +42 \N Int64 None +abc \N String None +order by d2, d1 nulls last +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +abc [1,2,3] String Array(Int64) +\N [1,2,3] None Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +42 42 Int64 Int64 +43 42 Int64 Int64 +abc 42 String Int64 +\N 42 None Int64 +42 43 Int64 Int64 +[1,2,3] abc Array(Int64) String +42 abc Int64 String +abc abc String String +abd abc String String +\N abc None String +abc abd String String +[1,2,3] \N Array(Int64) None +42 \N Int64 None +abc \N String None +\N \N None None +d1 = d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 0 Array(Int64) Array(Int64) +[1,2,3] 42 0 Array(Int64) Int64 +[1,2,3] abc 0 Array(Int64) String +[1,2,3] \N 0 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 0 Int64 Int64 +42 abc 0 Int64 String +42 \N 0 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 1 String String +abc abd 0 String String +abc \N 0 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 1 None None +d1 < d2 +[1,2,3] [1,2,3] 0 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 0 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 0 String String +abc abd 1 String String +abc \N 1 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 0 None None +d1 <= d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 1 String String +abc abd 1 String String +abc \N 1 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 1 None None +d1 > d2 +[1,2,3] [1,2,3] 0 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 0 Array(Int64) Array(Int64) +[1,2,3] 42 0 Array(Int64) Int64 +[1,2,3] abc 0 Array(Int64) String +[1,2,3] \N 0 Array(Int64) None +[1,2,4] [1,2,3] 1 Array(Int64) Array(Int64) +42 [1,2,3] 1 Int64 Array(Int64) +42 42 0 Int64 Int64 +42 43 0 Int64 Int64 +42 abc 0 Int64 String +42 \N 0 Int64 None +43 42 1 Int64 Int64 +abc [1,2,3] 1 String Array(Int64) +abc 42 1 String Int64 +abc abc 0 String String +abc abd 0 String String +abc \N 0 String None +abd abc 1 String String +\N [1,2,3] 1 None Array(Int64) +\N 42 1 None Int64 +\N abc 1 None String +\N \N 0 None None +d1 >= d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 1 Array(Int64) Array(Int64) +42 [1,2,3] 1 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 1 Int64 Int64 +abc [1,2,3] 1 String Array(Int64) +abc 42 1 String Int64 +abc abc 1 String String +abc abd 1 String String +abc \N 1 String None +abd abc 1 String String +\N [1,2,3] 1 None Array(Int64) +\N 42 1 None Int64 +\N abc 1 None String +\N \N 1 None None diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.sql b/tests/queries/0_stateless/03035_dynamic_sorting.sql new file mode 100644 index 00000000000..0487fafc955 --- /dev/null +++ b/tests/queries/0_stateless/03035_dynamic_sorting.sql @@ -0,0 +1,80 @@ +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (d1 Dynamic, d2 Dynamic) engine=Memory; + +insert into test values (42, 42); +insert into test values (42, 43); +insert into test values (43, 42); + +insert into test values ('abc', 'abc'); +insert into test values ('abc', 'abd'); +insert into test values ('abd', 'abc'); + +insert into test values ([1,2,3], [1,2,3]); +insert into test values ([1,2,3], [1,2,4]); +insert into test values ([1,2,4], [1,2,3]); + +insert into test values (NULL, NULL); + +insert into test values (42, 'abc'); +insert into test values ('abc', 42); + +insert into test values (42, [1,2,3]); +insert into test values ([1,2,3], 42); + +insert into test values (42, NULL); +insert into test values (NULL, 42); + +insert into test values ('abc', [1,2,3]); +insert into test values ([1,2,3], 'abc'); + +insert into test values ('abc', NULL); +insert into test values (NULL, 'abc'); + +insert into test values ([1,2,3], NULL); +insert into test values (NULL, [1,2,3]); + + +select 'order by d1 nulls first'; +select d1, dynamicType(d1) from test order by d1 nulls first; + +select 'order by d1 nulls last'; +select d1, dynamicType(d1) from test order by d1 nulls last; + +select 'order by d2 nulls first'; +select d2, dynamicType(d2) from test order by d2 nulls first; + +select 'order by d2 nulls last'; +select d2, dynamicType(d2) from test order by d2 nulls last; + + +select 'order by d1, d2 nulls first'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls first; + +select 'order by d1, d2 nulls last'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls last; + +select 'order by d2, d1 nulls first'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls first; + +select 'order by d2, d1 nulls last'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls last; + +select 'd1 = d2'; +select d1, d2, d1 = d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 < d2'; +select d1, d2, d1 < d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 <= d2'; +select d1, d2, d1 <= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 > d2'; +select d1, d2, d1 > d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 >= d2'; +select d1, d2, d2 >= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +drop table test; + diff --git a/tests/queries/0_stateless/03036_clamp.reference b/tests/queries/0_stateless/03036_clamp.reference new file mode 100644 index 00000000000..b866caf2261 --- /dev/null +++ b/tests/queries/0_stateless/03036_clamp.reference @@ -0,0 +1,14 @@ +10 +20 +15 +b +0 +['hello'] +-1 +234 +\N +\N +5 +0 +1 +2 diff --git a/tests/queries/0_stateless/03036_clamp.sql b/tests/queries/0_stateless/03036_clamp.sql new file mode 100644 index 00000000000..9973265c13b --- /dev/null +++ b/tests/queries/0_stateless/03036_clamp.sql @@ -0,0 +1,15 @@ +SELECT clamp(1, 10, 20); +SELECT clamp(30, 10, 20); +SELECT clamp(15, 10, 20); +SELECT clamp('a', 'b', 'c'); +SELECT clamp(today(), yesterday() - 10, yesterday() + 10) - today(); +SELECT clamp([], ['hello'], ['world']); +SELECT clamp(-1., -1000., 18446744073709551615.); +SELECT clamp(toNullable(123), 234, 456); +select clamp(1, null, 5); +select clamp(1, 6, null); +select clamp(1, 5, nan); +select clamp(toInt64(number), toInt64(number-1), toInt64(number+1)) from numbers(3); +select clamp(number, number-1, number+1) from numbers(3); -- { serverError NO_COMMON_TYPE } +select clamp(1, 3, 2); -- { serverError BAD_ARGUMENTS } +select clamp(1, data[1], data[2])from (select arrayJoin([[1, 2], [2,3], [3,2], [4, 4]]) as data); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference new file mode 100644 index 00000000000..36984bc8b9b --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference @@ -0,0 +1,57 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh new file mode 100755 index 00000000000..65517061b99 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000" + + $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" + $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" + $CH_CLIENT -q "select count() from test where d.String is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" + $CH_CLIENT -q "select count() from test where d.Date is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" + $CH_CLIENT -q "select count() from test where d is NULL" + $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" + + $CH_CLIENT -q "select d, d.UInt64, d.String, d.\`Array(Variant(String, UInt64))\` from test format Null" + $CH_CLIENT -q "select d.UInt64, d.String, d.\`Array(Variant(String, UInt64))\` from test format Null" + $CH_CLIENT -q "select d.Int8, d.Date, d.\`Array(String)\` from test format Null" + $CH_CLIENT -q "select d, d.UInt64, d.Date, d.\`Array(Variant(String, UInt64))\`, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.UInt64, d.Date, d.\`Array(Variant(String, UInt64))\`, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64, d.\`Array(Variant(String, UInt64))\`.String from test format Null" + $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference index 8820bb7cb9f..985f8192f26 100644 --- a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference @@ -5,7 +5,7 @@ Arrow a UInt64 a_nullable Nullable(UInt64) Parquet -b Array(Nullable(UInt64)) +b Array(UInt64) b_nullable Array(Nullable(UInt64)) Arrow b Array(Nullable(UInt64)) @@ -21,13 +21,13 @@ d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n Arrow d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) Parquet -e Map(UInt64, Nullable(String)) +e Map(UInt64, String) e_nullable Map(UInt64, Nullable(String)) Arrow e Map(UInt64, Nullable(String)) e_nullable Map(UInt64, Nullable(String)) Parquet -f Map(UInt64, Map(UInt64, Nullable(String))) +f Map(UInt64, Map(UInt64, String)) f_nullables Map(UInt64, Map(UInt64, Nullable(String))) Arrow f Map(UInt64, Map(UInt64, Nullable(String))) diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.reference b/tests/queries/0_stateless/03036_reading_s3_archives.reference new file mode 100644 index 00000000000..36ced212a1b --- /dev/null +++ b/tests/queries/0_stateless/03036_reading_s3_archives.reference @@ -0,0 +1,52 @@ +1 Str1 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 example3.csv test/03036_archive2.zip::example3.csv +3 Str3 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 example2.csv test/03036_archive2.zip::example2.csv +1 Str1 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 example3.csv test/03036_archive2.zip::example3.csv +1 Str1 example1.csv test/03036_archive1.tar::example1.csv +2 Str2 example1.csv test/03036_archive1.tar::example1.csv +7 Str7 example4.csv test/03036_archive1.tar::example4.csv +7 Str7 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 example4.csv test/03036_archive1.tar::example4.csv +8 Str8 example4.csv test/03036_archive2.tar::example4.csv +5 Str5 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv +11 Str11 example6.csv test/03036_archive3.tar.gz::example6.csv +12 Str12 example6.csv test/03036_archive3.tar.gz::example6.csv +3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 example3.csv test/03036_archive2.tar::example3.csv +3 Str3 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 example3.csv test/03036_archive2.tar::example3.csv +13 Str13 example7.csv test/03036_compressed_file_archive.zip::example7.csv +14 Str14 example7.csv test/03036_compressed_file_archive.zip::example7.csv diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql new file mode 100644 index 00000000000..00d7cc25e1a --- /dev/null +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -0,0 +1,22 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); +select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +CREATE TABLE table_zip22 Engine S3(s3_conn, filename='03036_archive2.zip :: example2.csv'); +select id, data, _file, _path from table_zip22 ORDER BY (id, _file, _path); +CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: example*.csv'); +SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); +CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); +SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); +CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) diff --git a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference new file mode 100644 index 00000000000..34f4287f360 --- /dev/null +++ b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference @@ -0,0 +1,11 @@ +1 Str1 +2 Str2 +3 Str3 +4 Str4 +DEFAULT 03036_archive1.zip::example1.csv id Nullable(Int64), data Nullable(String) +21 Str21 +22 Str22 +23 Str23 +24 Str24 +UNION 03036_json_archive.zip::example11.jsonl id Nullable(Int64), data Nullable(String) +UNION 03036_json_archive.zip::example12.jsonl id Nullable(Int64), data Nullable(String) diff --git a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql new file mode 100644 index 00000000000..61b3e1d6f43 --- /dev/null +++ b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql @@ -0,0 +1,9 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +SELECT * FROM s3(s3_conn, filename='03036_archive1.zip :: example{1,2}.csv') ORDER BY tuple(*); +SELECT schema_inference_mode, splitByChar('/', source)[-1] as file, schema FROM system.schema_inference_cache WHERE file = '03036_archive1.zip::example1.csv' ORDER BY file; + +SET schema_inference_mode = 'union'; +SELECT * FROM s3(s3_conn, filename='03036_json_archive.zip :: example{11,12}.jsonl') ORDER BY tuple(*); +SELECT schema_inference_mode, splitByChar('/', source)[-1] as file, schema FROM system.schema_inference_cache WHERE startsWith(file, '03036_json_archive.zip') ORDER BY file; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference new file mode 100644 index 00000000000..59297e46330 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference @@ -0,0 +1,60 @@ +MergeTree compact +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String +MergeTree wide +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh new file mode 100755 index 00000000000..7c1ac41cfdc --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192" + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" + $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" + $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference new file mode 100644 index 00000000000..59297e46330 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference @@ -0,0 +1,60 @@ +MergeTree compact +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String +MergeTree wide +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh new file mode 100755 index 00000000000..927ceac72b5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192" +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" + $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" + $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.reference b/tests/queries/0_stateless/03037_dynamic_merges_2.reference new file mode 100644 index 00000000000..420b8185b16 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree wide + horizontal merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree compact + vertical merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree wide + vertical merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.sh b/tests/queries/0_stateless/03037_dynamic_merges_2.sh new file mode 100755 index 00000000000..40adbdd4262 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(1000000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000)" + + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_s3_write_to_globbed_partitioned_path.reference b/tests/queries/0_stateless/03037_s3_write_to_globbed_partitioned_path.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03037_s3_write_to_globbed_partitioned_path.sql b/tests/queries/0_stateless/03037_s3_write_to_globbed_partitioned_path.sql new file mode 100644 index 00000000000..1de89a593b0 --- /dev/null +++ b/tests/queries/0_stateless/03037_s3_write_to_globbed_partitioned_path.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest + +insert into function s3('http://localhost:11111/test/data_*_{_partition_id}.csv') partition by number % 3 select * from numbers(10); -- {serverError DATABASE_ACCESS_DENIED} + diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference new file mode 100644 index 00000000000..65034647775 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference @@ -0,0 +1,92 @@ +MergeTree compact + horizontal merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree wide + horizontal merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree compact + vertical merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree wide + vertical merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh new file mode 100755 index 00000000000..b82ddb3813e --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + + $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference new file mode 100644 index 00000000000..3c186fcc935 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference @@ -0,0 +1,32 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh new file mode 100755 index 00000000000..b8760ec0e1d --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128 --optimize_aggregation_in_order 0" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sum AggregateFunction(sum, UInt64), d Dynamic) engine=AggregatingMergeTree() order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number" + $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference new file mode 100644 index 00000000000..fc293cc2ec8 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh new file mode 100755 index 00000000000..881c9ec64cc --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sign Int8, d Dynamic) engine=CollapsingMergeTree(sign) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference new file mode 100644 index 00000000000..132b9df6b26 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree compact + vertical merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree wide + vertical merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh new file mode 100755 index 00000000000..fc9039ac98c --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=ReplacingMergeTree order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference new file mode 100644 index 00000000000..3c186fcc935 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference @@ -0,0 +1,32 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh new file mode 100755 index 00000000000..f9da70e95ca --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sum UInt64, d Dynamic) engine=SummingMergeTree(sum) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference new file mode 100644 index 00000000000..cabb0fdefab --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh new file mode 100755 index 00000000000..ca313307a6d --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sign Int8, version UInt8, d Dynamic) engine=VersionedCollapsingMergeTree(sign, version) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference new file mode 100644 index 00000000000..ca98ec0963c --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -0,0 +1,526 @@ +Memory +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +4 UInt64 +7 String +8 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +5 UInt64 +8 String +9 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +5 UInt64 +8 String +9 None +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +5 UInt64 +8 String +12 None +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N +MergeTree compact +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N +MergeTree wide +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh new file mode 100755 index 00000000000..7a73be20a4d --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=1" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column 1" + $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column 1" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 1" + $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 1" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 2" + $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 2" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 3" + $CH_CLIENT -q "alter table test modify column y Dynamic settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 3" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=Memory" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference new file mode 100644 index 00000000000..18a181464e9 --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference @@ -0,0 +1,182 @@ +MergeTree compact +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +MergeTree wide +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh new file mode 100755 index 00000000000..6491e64372f --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column" + $CH_CLIENT -q "alter table test add column d Dynamic settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column 1" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter rename column 1" + $CH_CLIENT -q "alter table test rename column d to d1 settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" + $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert nested dynamic" + $CH_CLIENT -q "insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3)" + $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" + $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a, d1.\`Array(Dynamic)\`.UInt64, d1.\`Array(Dynamic)\`.String, d1.\`Array(Dynamic)\`.Date from test order by x" + + echo "alter rename column 2" + $CH_CLIENT -q "alter table test rename column d1 to d2 settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2)" + $CH_CLIENT -q "select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.\`Tuple(a UInt64)\`.a, d2.\`Array(Dynamic)\`.UInt64, d2.\`Array(Dynamic)\`.String, d2.\`Array(Dynamic)\`.Date, from test order by x" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference new file mode 100644 index 00000000000..b1ea186a917 --- /dev/null +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference @@ -0,0 +1,56 @@ +MergeTree compact +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +check table +1 +MergeTree wide +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +check table +1 diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh new file mode 100755 index 00000000000..3d802485be3 --- /dev/null +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column" + $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "check table" + $CH_CLIENT -q "check table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03095_group_by_server_constants_bug.reference b/tests/queries/0_stateless/03095_group_by_server_constants_bug.reference new file mode 100644 index 00000000000..80ab3c879bb --- /dev/null +++ b/tests/queries/0_stateless/03095_group_by_server_constants_bug.reference @@ -0,0 +1 @@ +r1 2 diff --git a/tests/queries/0_stateless/03095_group_by_server_constants_bug.sql b/tests/queries/0_stateless/03095_group_by_server_constants_bug.sql new file mode 100644 index 00000000000..9f9fda1ef62 --- /dev/null +++ b/tests/queries/0_stateless/03095_group_by_server_constants_bug.sql @@ -0,0 +1,5 @@ +SELECT serverUUID() AS s, count() FROM remote('127.0.0.{1,2}', system.one) GROUP BY s format Null; + +select getMacro('replica') as s, count() from remote('127.0.0.{1,2}', system.one) group by s; + +select uptime() as s, count() FROM remote('127.0.0.{1,2}', system.one) group by s format Null; diff --git a/tests/queries/0_stateless/03095_window_functions_qualify.sql b/tests/queries/0_stateless/03095_window_functions_qualify.sql index 35e203a2ffc..adedff2e2cf 100644 --- a/tests/queries/0_stateless/03095_window_functions_qualify.sql +++ b/tests/queries/0_stateless/03095_window_functions_qualify.sql @@ -27,10 +27,10 @@ SELECT '--'; EXPLAIN header = 1, actions = 1 SELECT number, COUNT() OVER (PARTITION BY number % 3) AS partition_count FROM numbers(10) QUALIFY COUNT() OVER (PARTITION BY number % 3) = 4 ORDER BY number; -SELECT number % toUInt256(2) AS key, count() FROM numbers(10) GROUP BY key WITH CUBE WITH TOTALS QUALIFY key = toNullable(toNullable(0)); -- { serverError 48 } +SELECT number % toUInt256(2) AS key, count() FROM numbers(10) GROUP BY key WITH CUBE WITH TOTALS QUALIFY key = toNullable(toNullable(0)); -- { serverError NOT_IMPLEMENTED } -SELECT number % 2 AS key, count(materialize(5)) IGNORE NULLS FROM numbers(10) WHERE toLowCardinality(toLowCardinality(materialize(2))) GROUP BY key WITH CUBE WITH TOTALS QUALIFY key = 0; -- { serverError 48 } +SELECT number % 2 AS key, count(materialize(5)) IGNORE NULLS FROM numbers(10) WHERE toLowCardinality(toLowCardinality(materialize(2))) GROUP BY key WITH CUBE WITH TOTALS QUALIFY key = 0; -- { serverError NOT_IMPLEMENTED } -SELECT 4, count(4) IGNORE NULLS, number % 2 AS key FROM numbers(10) GROUP BY key WITH ROLLUP WITH TOTALS QUALIFY key = materialize(0); -- { serverError 48 } +SELECT 4, count(4) IGNORE NULLS, number % 2 AS key FROM numbers(10) GROUP BY key WITH ROLLUP WITH TOTALS QUALIFY key = materialize(0); -- { serverError NOT_IMPLEMENTED } -SELECT 3, number % toLowCardinality(2) AS key, count() IGNORE NULLS FROM numbers(10) GROUP BY key WITH ROLLUP WITH TOTALS QUALIFY key = 0; -- { serverError 48 } +SELECT 3, number % toLowCardinality(2) AS key, count() IGNORE NULLS FROM numbers(10) GROUP BY key WITH ROLLUP WITH TOTALS QUALIFY key = 0; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql b/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql index 0f4a217a4ae..b1ddd141e04 100644 --- a/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql +++ b/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql @@ -1,8 +1,8 @@ set allow_experimental_analyzer = true; -select count; -- { serverError 47 } +select count; -- { serverError UNKNOWN_IDENTIFIER } -select conut(); -- { serverError 46 } +select conut(); -- { serverError UNKNOWN_FUNCTION } system flush logs; @@ -10,4 +10,4 @@ select count() > 0 from system.text_log where message_format_string = 'Peak memo select count() > 0 from system.text_log where level = 'Error' and message_format_string = 'Unknown {}{} identifier \'{}\' in scope {}{}' and value1 = 'expression' and value3 = 'count' and value4 = 'SELECT count'; -select count() > 0 from system.text_log where level = 'Error' and message_format_string = 'Function with name \'{}\' does not exists. In scope {}{}' and value1 = 'conut' and value2 = 'SELECT conut()' and value3 ilike '%\'count\'%'; +select count() > 0 from system.text_log where level = 'Error' and message_format_string = 'Function with name \'{}\' does not exist. In scope {}{}' and value1 = 'conut' and value2 = 'SELECT conut()' and value3 ilike '%\'count\'%'; diff --git a/tests/queries/0_stateless/03101_analyzer_identifiers_3.sql b/tests/queries/0_stateless/03101_analyzer_identifiers_3.sql index 8cb477ea6bf..77a0f040e88 100644 --- a/tests/queries/0_stateless/03101_analyzer_identifiers_3.sql +++ b/tests/queries/0_stateless/03101_analyzer_identifiers_3.sql @@ -39,7 +39,7 @@ SELECT * GROUP BY *; -- not ok as every component of ORDER BY may contain ASC/DESC and COLLATE; though can be supported in some sense -- but it works SELECT * ORDER BY *; -SELECT * WHERE *; -- { serverError UNSUPPORTED_METHOD } +SELECT * WHERE *; -- { serverError BAD_ARGUMENTS } SELECT '---'; diff --git a/tests/queries/0_stateless/03130_analyzer_array_join_prefer_column.reference b/tests/queries/0_stateless/03130_analyzer_array_join_prefer_column.reference new file mode 100644 index 00000000000..c64254b157c --- /dev/null +++ b/tests/queries/0_stateless/03130_analyzer_array_join_prefer_column.reference @@ -0,0 +1,2 @@ +0 UInt64 +0 UInt64 diff --git a/tests/queries/0_stateless/03130_analyzer_array_join_prefer_column.sql b/tests/queries/0_stateless/03130_analyzer_array_join_prefer_column.sql new file mode 100644 index 00000000000..1f7bc9f9df1 --- /dev/null +++ b/tests/queries/0_stateless/03130_analyzer_array_join_prefer_column.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (`id` UInt64, `value` String, `value_array` Array(UInt64)) ENGINE = MergeTree() ORDER BY id; +INSERT INTO test_table VALUES (0, 'aaa', [0]), (1, 'bbb', [1]), (2, 'ccc', [2]); + + +SELECT materialize(id), toTypeName(id) +FROM ( SELECT 'aaa' ) AS subquery +ARRAY JOIN [0] AS id +INNER JOIN test_table +USING (id) +; + +SELECT materialize(id), toTypeName(id) +FROM ( SELECT 'aaa' ) AS subquery +ARRAY JOIN [0] AS id +INNER JOIN test_table +USING (id) +SETTINGS prefer_column_name_to_alias = 1 +; diff --git a/tests/queries/0_stateless/03130_analyzer_self_join_group_by.reference b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.reference new file mode 100644 index 00000000000..095df5749cd --- /dev/null +++ b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.reference @@ -0,0 +1,6 @@ +1 +2 +3 +1 1 +2 2 +3 3 diff --git a/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql new file mode 100644 index 00000000000..66b6b99981b --- /dev/null +++ b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (x Int32) ENGINE = MergeTree ORDER BY x; +INSERT INTO t1 VALUES (1), (2), (3); + +SET allow_experimental_analyzer = 1; + +SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.number FROM numbers(10) as t1 JOIN numbers(10) as t2 ON t1.number = t2.number GROUP BY t1.number; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.a FROM (SELECT x as a FROM t1) as t1 JOIN (SELECT x as a FROM t1) as t2 ON t1.a = t2.a GROUP BY t1.a; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.a FROM (SELECT x as a FROM t1 UNION ALL SELECT x as a FROM t1) as t1 JOIN (SELECT x as a FROM t1 UNION ALL SELECT x as a FROM t1) as t2 ON t1.a = t2.a GROUP BY t1.a; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.number FROM numbers(10) JOIN numbers(10) as t2 ON number = t2.number GROUP BY number SETTINGS joined_subquery_requires_alias = 0; -- { serverError NOT_AN_AGGREGATE } + +SELECT t2.x FROM t1 as t0 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.x FROM t1 as t0 JOIN t1 as t2 ON t0.x = t2.x GROUP BY t0.x; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY x; -- { serverError NOT_AN_AGGREGATE } +SELECT t1.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE } +SELECT x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE } +SELECT x FROM t1 JOIN t1 as t2 USING (x) GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE } + +SELECT t1.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY x ORDER BY ALL; +SELECT x, sum(t2.x) FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x ORDER BY ALL; diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference new file mode 100644 index 00000000000..39669d21bee --- /dev/null +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -0,0 +1,5 @@ +1 +0 +0 +1 +100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql new file mode 100644 index 00000000000..0717c81aa0d --- /dev/null +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -0,0 +1,16 @@ +-- Test SQL function 'generateSnowflakeID' + +SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero + +SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs +SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^ +SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination + +SELECT generateSnowflakeID(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT count(*) +FROM +( + SELECT DISTINCT generateSnowflakeID() + FROM numbers(100) +); diff --git a/tests/queries/0_stateless/03131_deprecated_functions.sql b/tests/queries/0_stateless/03131_deprecated_functions.sql index 35cfe648c00..acdf36a50da 100644 --- a/tests/queries/0_stateless/03131_deprecated_functions.sql +++ b/tests/queries/0_stateless/03131_deprecated_functions.sql @@ -1,10 +1,10 @@ -SELECT number, neighbor(number, 2) FROM system.numbers LIMIT 10; -- { serverError 721 } +SELECT number, neighbor(number, 2) FROM system.numbers LIMIT 10; -- { serverError DEPRECATED_FUNCTION } -SELECT runningDifference(number) FROM system.numbers LIMIT 10; -- { serverError 721 } +SELECT runningDifference(number) FROM system.numbers LIMIT 10; -- { serverError DEPRECATED_FUNCTION } -SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k) AS sum_k FROM numbers(10) GROUP BY k ORDER BY k); -- { serverError 721 } +SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k) AS sum_k FROM numbers(10) GROUP BY k ORDER BY k); -- { serverError DEPRECATED_FUNCTION } -SET allow_deprecated_functions=1; +SET allow_deprecated_error_prone_window_functions=1; SELECT number, neighbor(number, 2) FROM system.numbers LIMIT 10 FORMAT Null; diff --git a/tests/queries/0_stateless/03131_hilbert_coding.reference b/tests/queries/0_stateless/03131_hilbert_coding.reference new file mode 100644 index 00000000000..bdb578483fa --- /dev/null +++ b/tests/queries/0_stateless/03131_hilbert_coding.reference @@ -0,0 +1,8 @@ +----- START ----- +----- CONST ----- +133 +31 +(3,4) +----- 4294967296, 2 ----- +----- ERRORS ----- +----- END ----- diff --git a/tests/queries/0_stateless/03131_hilbert_coding.sql b/tests/queries/0_stateless/03131_hilbert_coding.sql new file mode 100644 index 00000000000..ed293dc6910 --- /dev/null +++ b/tests/queries/0_stateless/03131_hilbert_coding.sql @@ -0,0 +1,55 @@ +SELECT '----- START -----'; +drop table if exists hilbert_numbers_03131; +create table hilbert_numbers_03131( + n1 UInt32, + n2 UInt32 +) + Engine=MergeTree() + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; + +SELECT '----- CONST -----'; +select hilbertEncode(133); +select hilbertEncode(3, 4); +select hilbertDecode(2, 31); + +SELECT '----- 4294967296, 2 -----'; +insert into hilbert_numbers_03131 +select n1.number, n2.number +from numbers(pow(2, 32)-8,8) n1 + cross join numbers(pow(2, 32)-8, 8) n2 +; + +drop table if exists hilbert_numbers_1_03131; +create table hilbert_numbers_1_03131( + n1 UInt64, + n2 UInt64 +) + Engine=MergeTree() + ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; + +insert into hilbert_numbers_1_03131 +select untuple(hilbertDecode(2, hilbertEncode(n1, n2))) +from hilbert_numbers_03131; + +( + select n1, n2 from hilbert_numbers_03131 + union distinct + select n1, n2 from hilbert_numbers_1_03131 +) +except +( + select n1, n2 from hilbert_numbers_03131 + intersect + select n1, n2 from hilbert_numbers_1_03131 +); +drop table if exists hilbert_numbers_1_03131; + +select '----- ERRORS -----'; +select hilbertEncode(); -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } +select hilbertDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select hilbertEncode('text'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select hilbertDecode('text', 'text'); -- { serverError ILLEGAL_COLUMN } +select hilbertEncode((1, 2), 3); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT '----- END -----'; +drop table if exists hilbert_numbers_03131; diff --git a/tests/queries/0_stateless/03134_positional_arguments.reference b/tests/queries/0_stateless/03134_positional_arguments.reference new file mode 100644 index 00000000000..f1cd5725b24 --- /dev/null +++ b/tests/queries/0_stateless/03134_positional_arguments.reference @@ -0,0 +1,16 @@ +1 +2 +3 +Hello +1 +2 +is not supported +Use one of the following commands +1 +2 +3 +Hello +1 +2 +3 +Hello diff --git a/tests/queries/0_stateless/03134_positional_arguments.sh b/tests/queries/0_stateless/03134_positional_arguments.sh new file mode 100755 index 00000000000..437c8226010 --- /dev/null +++ b/tests/queries/0_stateless/03134_positional_arguments.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Checks that "clickhouse-client/local --help" prints a brief summary of CLI arguments and "--help --verbose" prints all possible CLI arguments +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# The best way to write the query parameter, explicit long option. +${CLICKHOUSE_BINARY} --query "SELECT 1" + +# Shorthand option: +${CLICKHOUSE_BINARY} -q "SELECT 2" + +# It is also accepted as a positional argument +${CLICKHOUSE_BINARY} "SELECT 3" + +# The positional argument can go after normal arguments. +${CLICKHOUSE_BINARY} --param_test Hello "SELECT {test:String}" + +# This is ambiguous: currently works, but does not have to. +${CLICKHOUSE_BINARY} --query "SELECT 1" "SELECT 2" + +# Multiple positional arguments are not allowed. +${CLICKHOUSE_BINARY} "SELECT 1" "SELECT 2" 2>&1 | grep -o -F 'is not supported' + +# This is ambiguous - in case of a single word, it can be confused with a tool name. +${CLICKHOUSE_BINARY} "SELECT" 2>&1 | grep -o -F 'Use one of the following commands' + +# Everything works with clickhouse/ch/chl and also in clickhouse-local and clickhouse-client. + +${CLICKHOUSE_LOCAL} --query "SELECT 1" +${CLICKHOUSE_LOCAL} -q "SELECT 2" +${CLICKHOUSE_LOCAL} "SELECT 3" +${CLICKHOUSE_LOCAL} --param_test Hello "SELECT {test:String}" + +${CLICKHOUSE_CLIENT_BINARY} --query "SELECT 1" +${CLICKHOUSE_CLIENT_BINARY} -q "SELECT 2" +${CLICKHOUSE_CLIENT_BINARY} "SELECT 3" +${CLICKHOUSE_CLIENT_BINARY} --param_test Hello "SELECT {test:String}" diff --git a/tests/queries/0_stateless/03135_keeper_client_find_commands.reference b/tests/queries/0_stateless/03135_keeper_client_find_commands.reference new file mode 100644 index 00000000000..3a0f080674f --- /dev/null +++ b/tests/queries/0_stateless/03135_keeper_client_find_commands.reference @@ -0,0 +1,7 @@ +find_super_nodes +/test-keeper-client-default/1 4 +/test-keeper-client-default/1/d 3 +find_big_family +/test-keeper-client-default 10 +/test-keeper-client-default/1 9 +/test-keeper-client-default/1/d 4 diff --git a/tests/queries/0_stateless/03135_keeper_client_find_commands.sh b/tests/queries/0_stateless/03135_keeper_client_find_commands.sh new file mode 100755 index 00000000000..0f57694028d --- /dev/null +++ b/tests/queries/0_stateless/03135_keeper_client_find_commands.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +path="/test-keeper-client-$CLICKHOUSE_DATABASE" + +$CLICKHOUSE_KEEPER_CLIENT -q "rm $path" >& /dev/null + +$CLICKHOUSE_KEEPER_CLIENT -q "create $path 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/a 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/a/a 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/b 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/c 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/a 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/b 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/c 'foobar'" + +echo 'find_super_nodes' +$CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 1000000000" +$CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 3 $path" | sort + +echo 'find_big_family' +$CLICKHOUSE_KEEPER_CLIENT -q "find_big_family $path 3" + +$CLICKHOUSE_KEEPER_CLIENT -q "rmr $path" diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference new file mode 100644 index 00000000000..864f62d3113 --- /dev/null +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference @@ -0,0 +1,5 @@ + ┏━━━┓ + ┃ x ┃ + ┡━━━┩ +1. │ █ │ + └───┘ diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql new file mode 100644 index 00000000000..e37b0db08e9 --- /dev/null +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql @@ -0,0 +1 @@ +SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 0) AS x FORMAT Pretty; diff --git a/tests/queries/0_stateless/03143_benchmark_query_id_prefix.reference b/tests/queries/0_stateless/03143_benchmark_query_id_prefix.reference new file mode 100644 index 00000000000..d7a23e16ea5 --- /dev/null +++ b/tests/queries/0_stateless/03143_benchmark_query_id_prefix.reference @@ -0,0 +1 @@ +0 100 diff --git a/tests/queries/0_stateless/03143_benchmark_query_id_prefix.sh b/tests/queries/0_stateless/03143_benchmark_query_id_prefix.sh new file mode 100755 index 00000000000..ed68b443c9a --- /dev/null +++ b/tests/queries/0_stateless/03143_benchmark_query_id_prefix.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +query_id_prefix=${CLICKHOUSE_DATABASE}_test_benchmark +$CLICKHOUSE_BENCHMARK -i 100 -c 8 <<< "SELECT 1" --query_id_prefix $query_id_prefix 2>/dev/null + +$CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT --query "SELECT countIf(query_id = '$query_id_prefix'), countIf(query_id LIKE '$query_id_prefix%') FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish'" diff --git a/tests/queries/0_stateless/03143_cte_scope.reference b/tests/queries/0_stateless/03143_cte_scope.reference new file mode 100644 index 00000000000..0e1e7dfa6be --- /dev/null +++ b/tests/queries/0_stateless/03143_cte_scope.reference @@ -0,0 +1,2 @@ +1 2 3 0.3 1 2 4 0.3 +5 6 7 0.4 5 6 8 0.4 diff --git a/tests/queries/0_stateless/03143_cte_scope.sql b/tests/queries/0_stateless/03143_cte_scope.sql new file mode 100644 index 00000000000..1b1d9444651 --- /dev/null +++ b/tests/queries/0_stateless/03143_cte_scope.sql @@ -0,0 +1,43 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/56287 +SET allow_experimental_analyzer = 1; +DROP TABLE IF EXISTS tmp_a; +DROP TABLE IF EXISTS tmp_b; + +CREATE TEMPORARY TABLE IF NOT EXISTS tmp_a +( + k1 Int32, + k2 Int32, + d1 Int32, + d2 Int32 +) ENGINE = Memory; +INSERT INTO tmp_a VALUES (1,2,3,4); +INSERT INTO tmp_a VALUES (5,6,7,8); + +CREATE TEMPORARY TABLE IF NOT EXISTS tmp_b ( + k1 Int32, + k2 Int32, + d0 Float64 +) ENGINE = Memory; +INSERT INTO tmp_b VALUES (1,2,0.3); +INSERT INTO tmp_b VALUES (5,6,0.4); + +SELECT tb1.*,tb2.* +FROM + ( + with tmp0 as (select k1,k2,d1 from tmp_a), + tmp_s as (select k1,k2,d0 from tmp_b), + tmp1 as (select tmp0.*,tmp_s.d0 from tmp0 left join tmp_s on tmp0.k1=tmp_s.k1 and tmp0.k2=tmp_s.k2) + select * from tmp1 + ) as tb1 + LEFT JOIN + ( + with tmp0 as (select k1,k2,d2 from tmp_a), + tmp_s as (select k1,k2,d0 from tmp_b), + tmp1 as (select tmp0.*,tmp_s.d0 from tmp0 left join tmp_s on tmp0.k1=tmp_s.k1 and tmp0.k2=tmp_s.k2) + select * from tmp1 + ) as tb2 + ON tb1.k1=tb2.k1 AND tb1.k2=tb2.k2 +ORDER BY k1; + +DROP TABLE IF EXISTS tmp_a; +DROP TABLE IF EXISTS tmp_b; diff --git a/tests/queries/0_stateless/03143_join_filter_push_down_filled_join_fix.reference b/tests/queries/0_stateless/03143_join_filter_push_down_filled_join_fix.reference new file mode 100644 index 00000000000..4f89085d5ff --- /dev/null +++ b/tests/queries/0_stateless/03143_join_filter_push_down_filled_join_fix.reference @@ -0,0 +1 @@ +1 1 1 test diff --git a/tests/queries/0_stateless/03143_join_filter_push_down_filled_join_fix.sql b/tests/queries/0_stateless/03143_join_filter_push_down_filled_join_fix.sql new file mode 100644 index 00000000000..fc816623bd4 --- /dev/null +++ b/tests/queries/0_stateless/03143_join_filter_push_down_filled_join_fix.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 +( + id UInt64, + external_id UInt64 +) +ENGINE = MergeTree +ORDER BY id; + +DROP TABLE IF EXISTS t2; +CREATE TABLE t2 +( + id UInt64, + name String +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO t1 VALUES (1, 1); + +INSERT INTO t2 VALUES (1, 'test'); + +DROP DICTIONARY IF EXISTS d2; +CREATE DICTIONARY d2 +( + id UInt64, + name String, +) +PRIMARY KEY id +SOURCE(CLICKHOUSE( + table t2)) +LIFETIME(MIN 600 MAX 900) +LAYOUT(HASHED()); + +SELECT + * +FROM + t1 + LEFT JOIN d2 ON d2.id = t1.external_id + WHERE t1.id = 1 +LIMIT 1; + +DROP DICTIONARY d2; +DROP TABLE t2; +DROP TABLE t1; diff --git a/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference new file mode 100644 index 00000000000..9daeafb9864 --- /dev/null +++ b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference @@ -0,0 +1 @@ +test diff --git a/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql new file mode 100644 index 00000000000..97ed29802c7 --- /dev/null +++ b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS mv_table; +DROP TABLE IF EXISTS null_table; + +SET cluster_for_parallel_replicas='parallel_replicas', max_parallel_replicas=4, allow_experimental_parallel_reading_from_replicas=1; +SET allow_experimental_analyzer=1; + +CREATE TABLE null_table (str String) ENGINE = Null; +CREATE MATERIALIZED VIEW mv_table (str String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03143_parallel_replicas_mat_view_bug', '{replica}') ORDER BY str AS SELECT str AS str FROM null_table; +INSERT INTO null_table VALUES ('test'); + +SELECT * FROM mv_table; diff --git a/tests/queries/0_stateless/03143_prewhere_profile_events.reference b/tests/queries/0_stateless/03143_prewhere_profile_events.reference new file mode 100644 index 00000000000..32c93b89dc5 --- /dev/null +++ b/tests/queries/0_stateless/03143_prewhere_profile_events.reference @@ -0,0 +1,4 @@ +52503 10000000 +52503 10052503 +26273 10000000 +0 10052503 diff --git a/tests/queries/0_stateless/03143_prewhere_profile_events.sh b/tests/queries/0_stateless/03143_prewhere_profile_events.sh new file mode 100755 index 00000000000..863fcc1fe01 --- /dev/null +++ b/tests/queries/0_stateless/03143_prewhere_profile_events.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# Tags: no-random-merge-tree-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -nq " + DROP TABLE IF EXISTS t; + + CREATE TABLE t(a UInt32, b UInt32, c UInt32, d UInt32) ENGINE=MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part=1, min_rows_for_wide_part=1; + + INSERT INTO t SELECT number, number, number, number FROM numbers_mt(1e7); + + OPTIMIZE TABLE t FINAL; +" + +query_id_1=$RANDOM$RANDOM +query_id_2=$RANDOM$RANDOM +query_id_3=$RANDOM$RANDOM +query_id_4=$RANDOM$RANDOM + +client_opts=( + --max_block_size 65409 + --max_threads 8 +) + +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_1" -nq " + SELECT * + FROM t +PREWHERE (b % 8192) = 42 + WHERE c = 42 + FORMAT Null +" + +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_2" -nq " + SELECT * + FROM t +PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 + WHERE d = 42 + FORMAT Null +settings enable_multiple_prewhere_read_steps=1; +" + +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_3" -nq " + SELECT * + FROM t +PREWHERE (b % 8192) = 42 AND (c % 16384) = 42 + WHERE d = 42 + FORMAT Null +settings enable_multiple_prewhere_read_steps=0; +" + +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_4" -nq " + SELECT b, c + FROM t +PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 + FORMAT Null +settings enable_multiple_prewhere_read_steps=1; +" + +${CLICKHOUSE_CLIENT} -nq " + SYSTEM FLUSH LOGS; + + -- 52503 which is 43 * number of granules, 10000000 + SELECT ProfileEvents['RowsReadByMainReader'], ProfileEvents['RowsReadByPrewhereReaders'] + FROM system.query_log + WHERE current_database=currentDatabase() AND query_id = '$query_id_1' and type = 'QueryFinish'; + + -- 52503, 10052503 which is the sum of 10000000 from the first prewhere step plus 52503 from the second + SELECT ProfileEvents['RowsReadByMainReader'], ProfileEvents['RowsReadByPrewhereReaders'] + FROM system.query_log + WHERE current_database=currentDatabase() AND query_id = '$query_id_2' and type = 'QueryFinish'; + + -- 26273 the same as query #1 but twice less data (43 * ceil((52503 / 43) / 2)), 10000000 + SELECT ProfileEvents['RowsReadByMainReader'], ProfileEvents['RowsReadByPrewhereReaders'] + FROM system.query_log + WHERE current_database=currentDatabase() AND query_id = '$query_id_3' and type = 'QueryFinish'; + + -- 0, 10052503 + SELECT ProfileEvents['RowsReadByMainReader'], ProfileEvents['RowsReadByPrewhereReaders'] + FROM system.query_log + WHERE current_database=currentDatabase() AND query_id = '$query_id_4' and type = 'QueryFinish'; +" diff --git a/tests/queries/0_stateless/03143_ttl_in_system_parts_columns_table.reference b/tests/queries/0_stateless/03143_ttl_in_system_parts_columns_table.reference new file mode 100644 index 00000000000..f358d128f8a --- /dev/null +++ b/tests/queries/0_stateless/03143_ttl_in_system_parts_columns_table.reference @@ -0,0 +1,4 @@ +all_1_1_0 timestamp DateTime \N \N +all_1_1_0 x UInt32 2100-02-01 00:00:00 2100-02-01 00:00:00 +all_1_1_0 y String 2100-01-02 00:00:00 2100-01-02 00:00:00 +all_1_1_0 z String \N \N diff --git a/tests/queries/0_stateless/03143_ttl_in_system_parts_columns_table.sql b/tests/queries/0_stateless/03143_ttl_in_system_parts_columns_table.sql new file mode 100644 index 00000000000..50adab2e9b0 --- /dev/null +++ b/tests/queries/0_stateless/03143_ttl_in_system_parts_columns_table.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS test_03143; + +CREATE TABLE test_03143 ( + timestamp DateTime, + x UInt32 TTL timestamp + INTERVAL 1 MONTH, + y String TTL timestamp + INTERVAL 1 DAY, + z String +) +ENGINE = MergeTree +ORDER BY tuple(); + + +INSERT INTO test_03143 VALUES ('2100-01-01', 123, 'Hello, world!', 'xxx yyy'); + +SELECT + name, + column, + type, + column_ttl_min, + column_ttl_max +FROM system.parts_columns +WHERE table = 'test_03143' and database = currentDatabase() +ORDER BY name, column; + +DROP TABLE IF EXISTS test_03143; diff --git a/tests/queries/0_stateless/03143_window_functions_qualify_validation.reference b/tests/queries/0_stateless/03143_window_functions_qualify_validation.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03143_window_functions_qualify_validation.sql b/tests/queries/0_stateless/03143_window_functions_qualify_validation.sql new file mode 100644 index 00000000000..5adbe7ff2a7 --- /dev/null +++ b/tests/queries/0_stateless/03143_window_functions_qualify_validation.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS uk_price_paid; +CREATE TABLE uk_price_paid +( + `price` UInt32, + `date` Date, + `postcode1` LowCardinality(String), + `postcode2` LowCardinality(String), + `type` Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0), + `is_new` UInt8, + `duration` Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0), + `addr1` String, + `addr2` String, + `street` LowCardinality(String), + `locality` LowCardinality(String), + `town` LowCardinality(String), + `district` LowCardinality(String), + `county` LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2); + +SELECT count(), (quantile(0.9)(price) OVER ()) AS price_quantile FROM uk_price_paid WHERE toYear(date) = 2023 QUALIFY price > price_quantile; -- { serverError NOT_AN_AGGREGATE } + +SELECT count() FROM uk_price_paid WHERE toYear(date) = 2023 QUALIFY price > (quantile(0.9)(price) OVER ()); -- { serverError NOT_AN_AGGREGATE } + +DROP TABLE uk_price_paid; diff --git a/tests/queries/0_stateless/03144_aggregate_states_with_different_types.reference b/tests/queries/0_stateless/03144_aggregate_states_with_different_types.reference new file mode 100644 index 00000000000..1e94d71681e --- /dev/null +++ b/tests/queries/0_stateless/03144_aggregate_states_with_different_types.reference @@ -0,0 +1,2 @@ +28A42640E39BB93F016A919E241536D33F020000000000000002000000000000000000000000000000000000000000000000 8014AE47E17AA43F01676666666666F6BF020000000000000002000000000000000000000000000000000000000000000000 +1 0 diff --git a/tests/queries/0_stateless/03144_aggregate_states_with_different_types.sql b/tests/queries/0_stateless/03144_aggregate_states_with_different_types.sql new file mode 100644 index 00000000000..5bb4a8c9c9c --- /dev/null +++ b/tests/queries/0_stateless/03144_aggregate_states_with_different_types.sql @@ -0,0 +1,28 @@ +SET allow_experimental_analyzer = 1; + +select * APPLY hex +from ( + select ( + select stochasticLogisticRegressionState(0.1, 0., 5, 'SGD')(number, number) + from numbers(10) + ) as col1, + ( + select stochasticLinearRegressionState(0.1, 0., 5, 'SGD')(number, number) + from numbers(10) + ) as col2 +from numbers(1) +); + +SELECT * +FROM +( + SELECT + bitmapHasAny(bitmapBuild([toUInt8(1)]), + ( + SELECT groupBitmapState(toUInt8(1)) + )) has1, + bitmapHasAny(bitmapBuild([toUInt64(1)]), + ( + SELECT groupBitmapState(toUInt64(2)) + )) has2 +); diff --git a/tests/queries/0_stateless/03144_alter_column_and_read.reference b/tests/queries/0_stateless/03144_alter_column_and_read.reference new file mode 100644 index 00000000000..3e1271d6ed7 --- /dev/null +++ b/tests/queries/0_stateless/03144_alter_column_and_read.reference @@ -0,0 +1,10 @@ +0 0_42 +1 1_42 +2 2_42 +3 3_42 +4 4_42 +5 5_42 +6 6_42 +7 7_42 +8 8_42 +9 9_42 diff --git a/tests/queries/0_stateless/03144_alter_column_and_read.sql b/tests/queries/0_stateless/03144_alter_column_and_read.sql new file mode 100644 index 00000000000..d198c3447b1 --- /dev/null +++ b/tests/queries/0_stateless/03144_alter_column_and_read.sql @@ -0,0 +1,11 @@ +drop table if exists tab; +create table tab (x UInt32) engine = MergeTree order by tuple(); + +insert into tab select number from numbers(10); + +set alter_sync = 0; +alter table tab update x = x + sleepEachRow(0.1) where 1; +alter table tab modify column x String; +alter table tab add column y String default x || '_42'; + +select x, y from tab order by x; diff --git a/tests/queries/0_stateless/03144_compress_stdout.reference b/tests/queries/0_stateless/03144_compress_stdout.reference new file mode 100644 index 00000000000..6f51dfc24e1 --- /dev/null +++ b/tests/queries/0_stateless/03144_compress_stdout.reference @@ -0,0 +1,2 @@ +Hello, World! From client. +Hello, World! From local. diff --git a/tests/queries/0_stateless/03144_compress_stdout.sh b/tests/queries/0_stateless/03144_compress_stdout.sh new file mode 100755 index 00000000000..569754303a7 --- /dev/null +++ b/tests/queries/0_stateless/03144_compress_stdout.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +[ -e "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_client.gz ] && rm "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_client.gz + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM (SELECT 'Hello, World! From client.')" > ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client.gz +gunzip ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client.gz +cat ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client + +rm -f "${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client" + +[ -e "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_local.gz ] && rm "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_local.gz + +${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.')" > ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local.gz +gunzip ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local.gz +cat ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local + +rm -f "${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local" diff --git a/tests/queries/0_stateless/03144_fuzz_quoted_type_name.reference b/tests/queries/0_stateless/03144_fuzz_quoted_type_name.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03144_fuzz_quoted_type_name.sql b/tests/queries/0_stateless/03144_fuzz_quoted_type_name.sql new file mode 100644 index 00000000000..4f6cc6c86ba --- /dev/null +++ b/tests/queries/0_stateless/03144_fuzz_quoted_type_name.sql @@ -0,0 +1,7 @@ +create table t (x 123) engine Memory; -- { clientError SYNTAX_ERROR } +create table t (x `a.b`) engine Memory; -- { clientError SYNTAX_ERROR } +create table t (x Array(`a.b`)) engine Memory; -- { clientError SYNTAX_ERROR } + +create table t (x Array(`ab`)) engine Memory; -- { serverError UNKNOWN_TYPE } +create table t (x `ab`) engine Memory; -- { serverError UNKNOWN_TYPE } +create table t (x `Int64`) engine Memory; \ No newline at end of file diff --git a/tests/queries/0_stateless/03144_invalid_filter.reference b/tests/queries/0_stateless/03144_invalid_filter.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03144_invalid_filter.sql b/tests/queries/0_stateless/03144_invalid_filter.sql new file mode 100644 index 00000000000..deb8d7b96b3 --- /dev/null +++ b/tests/queries/0_stateless/03144_invalid_filter.sql @@ -0,0 +1,14 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/48049 +SET allow_experimental_analyzer = 1; + +CREATE TABLE test_table (`id` UInt64, `value` String) ENGINE = TinyLog() AS Select number, number::String from numbers(10); + +WITH CAST(tuple(1), 'Tuple (value UInt64)') AS compound_value +SELECT id, test_table.* APPLY x -> compound_value.* +FROM test_table +WHERE arrayMap(x -> toString(x) AS lambda, [NULL, 256, 257, NULL, NULL]) +SETTINGS convert_query_to_cnf = true, optimize_using_constraints = true, optimize_substitute_columns = true; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } + +DESCRIBE TABLE (SELECT test_table.COLUMNS(id) FROM test_table WHERE '2147483647'); -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.reference b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.reference new file mode 100644 index 00000000000..ed135e928a9 --- /dev/null +++ b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.reference @@ -0,0 +1,11 @@ +Starting alters +Finishing alters +Equal number of columns +Replication did not hang: synced all replicas of concurrent_alter_add_drop_steroids_ +Consistency: 1 +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh new file mode 100755 index 00000000000..ea7bb8f7ad0 --- /dev/null +++ b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + +REPLICAS=3 + +for i in $(seq $REPLICAS); do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_alter_add_drop_steroids_$i" +done + + +for i in $(seq $REPLICAS); do + $CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_add_drop_steroids_$i (key UInt64, value0 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_add_drop_steroids_column', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue = 1000, number_of_free_entries_in_pool_to_execute_mutation = 0, max_replicated_merges_in_queue = 1000, index_granularity = 8192, index_granularity_bytes = '10Mi'" +done + +$CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_add_drop_steroids_1 SELECT number, number + 10 from numbers(100000)" + +for i in $(seq $REPLICAS); do + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_add_drop_steroids_$i" +done + + +function alter_thread() +{ + while true; do + REPLICA=$(($RANDOM % 3 + 1)) + ADD=$(($RANDOM % 5 + 1)) + $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_steroids_$REPLICA ADD COLUMN value$ADD UInt32 DEFAULT 42 SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency + DROP=$(($RANDOM % 5 + 1)) + $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_steroids_$REPLICA DROP COLUMN value$DROP SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency + sleep 0.$RANDOM + done +} + +function alter_thread_1() +{ + while true; do + REPLICA=$(($RANDOM % 3 + 1)) + ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alter_add_drop_steroids_1 MODIFY COLUMN value0 String SETTINGS mutations_sync = 0" + sleep 1.$RANDOM + ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alter_add_drop_steroids_1 MODIFY COLUMN value0 UInt8 SETTINGS mutations_sync = 0" + sleep 1.$RANDOM + done + +} + +function optimize_thread() +{ + while true; do + REPLICA=$(($RANDOM % 3 + 1)) + $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE concurrent_alter_add_drop_steroids_$REPLICA FINAL SETTINGS replication_alter_partitions_sync=0"; + sleep 0.$RANDOM + done +} + +function insert_thread() +{ + while true; do + REPLICA=$(($RANDOM % 3 + 1)) + $CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_add_drop_steroids_$REPLICA VALUES($RANDOM, 7)" + sleep 0.$RANDOM + done +} + +function select_thread() +{ + while true; do + REPLICA=$(($RANDOM % 3 + 1)) + $CLICKHOUSE_CLIENT --query "SELECT * FROM merge(currentDatabase(), 'concurrent_alter_add_drop_steroids_') FORMAT Null" + sleep 0.$RANDOM + done +} + + +echo "Starting alters" +export -f alter_thread; +export -f alter_thread_1; +export -f select_thread; +export -f optimize_thread; +export -f insert_thread; + + +TIMEOUT=30 + +# Sometimes we detach and attach tables +timeout $TIMEOUT bash -c alter_thread 2> /dev/null & +timeout $TIMEOUT bash -c alter_thread 2> /dev/null & +timeout $TIMEOUT bash -c alter_thread 2> /dev/null & + +timeout $TIMEOUT bash -c alter_thread_1 2> /dev/null & +timeout $TIMEOUT bash -c alter_thread_1 2> /dev/null & +timeout $TIMEOUT bash -c alter_thread_1 2> /dev/null & + +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & + +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & + +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & + +wait + +echo "Finishing alters" + +columns1=$($CLICKHOUSE_CLIENT --query "select count() from system.columns where table='concurrent_alter_add_drop_steroids_1' and database='$CLICKHOUSE_DATABASE'" 2> /dev/null) +columns2=$($CLICKHOUSE_CLIENT --query "select count() from system.columns where table='concurrent_alter_add_drop_steroids_2' and database='$CLICKHOUSE_DATABASE'" 2> /dev/null) +columns3=$($CLICKHOUSE_CLIENT --query "select count() from system.columns where table='concurrent_alter_add_drop_steroids_3' and database='$CLICKHOUSE_DATABASE'" 2> /dev/null) + +while [ "$columns1" != "$columns2" ] || [ "$columns2" != "$columns3" ]; do + columns1=$($CLICKHOUSE_CLIENT --query "select count() from system.columns where table='concurrent_alter_add_drop_steroids_1' and database='$CLICKHOUSE_DATABASE'" 2> /dev/null) + columns2=$($CLICKHOUSE_CLIENT --query "select count() from system.columns where table='concurrent_alter_add_drop_steroids_2' and database='$CLICKHOUSE_DATABASE'" 2> /dev/null) + columns3=$($CLICKHOUSE_CLIENT --query "select count() from system.columns where table='concurrent_alter_add_drop_steroids_3' and database='$CLICKHOUSE_DATABASE'" 2> /dev/null) + + sleep 1 +done + +echo "Equal number of columns" + +# This alter will finish all previous, but replica 1 maybe still not up-to-date +while [[ $(timeout 120 ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alter_add_drop_steroids_1 MODIFY COLUMN value0 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do + sleep 1 +done + +check_replication_consistency "concurrent_alter_add_drop_steroids_" "count(), sum(key), sum(cityHash64(value0))" + +for i in $(seq $REPLICAS); do + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_add_drop_steroids_$i" + $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE is_done = 0 and table = 'concurrent_alter_add_drop_steroids_$i'" + $CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE is_done = 0 and table = 'concurrent_alter_add_drop_steroids_$i'" + $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.replication_queue WHERE table = 'concurrent_alter_add_drop_steroids_$i'" + $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE table = 'concurrent_alter_add_drop_steroids_$i' and (type = 'ALTER_METADATA' or type = 'MUTATE_PART')" + + $CLICKHOUSE_CLIENT --query "DETACH TABLE concurrent_alter_add_drop_steroids_$i" + $CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_alter_add_drop_steroids_$i" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_alter_add_drop_steroids_$i" +done diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.reference b/tests/queries/0_stateless/03145_non_loaded_projection_backup.reference new file mode 100644 index 00000000000..a11ee210e62 --- /dev/null +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.reference @@ -0,0 +1,7 @@ +7 +Found unexpected projection directories: pp.proj +BACKUP_CREATED +RESTORED +7 +Found unexpected projection directories: pp.proj +0 diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh new file mode 100755 index 00000000000..7df2118ad0c --- /dev/null +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists tp_1; +create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100); +system stop merges tp_1; +insert into tp_1 select number, number from numbers(3); + +set mutations_sync = 2; + +alter table tp_1 add projection pp (select x, count() group by x); +insert into tp_1 select number, number from numbers(4); +select count() from tp_1; + +-- Here we have a part with written projection pp +alter table tp_1 detach partition '0'; +-- Move part to detached +alter table tp_1 clear projection pp; +-- Remove projection from table metadata +alter table tp_1 drop projection pp; +-- Now, we don't load projection pp for attached part, but it is written on disk +alter table tp_1 attach partition '0'; +" + +$CLICKHOUSE_CLIENT -nm -q " +set send_logs_level='fatal'; +check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" + +backup_id="$CLICKHOUSE_TEST_UNIQUE_NAME" +$CLICKHOUSE_CLIENT -q " +backup table tp_1 to Disk('backups', '$backup_id'); +" | grep -o "BACKUP_CREATED" + +$CLICKHOUSE_CLIENT -nm -q " +set send_logs_level='fatal'; +drop table tp_1; +restore table tp_1 from Disk('backups', '$backup_id'); +system stop merges tp_1; +" | grep -o "RESTORED" + +$CLICKHOUSE_CLIENT -q "select count() from tp_1;" +$CLICKHOUSE_CLIENT -nm -q " +set send_logs_level='fatal'; +check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" +$CLICKHOUSE_CLIENT -nm -q " +set send_logs_level='fatal'; +check table tp_1" +$CLICKHOUSE_CLIENT -nm -q " +set send_logs_level='fatal'; +drop table tp_1" diff --git a/tests/queries/0_stateless/03145_unicode_quotes.reference b/tests/queries/0_stateless/03145_unicode_quotes.reference new file mode 100644 index 00000000000..8d33edf14e3 --- /dev/null +++ b/tests/queries/0_stateless/03145_unicode_quotes.reference @@ -0,0 +1,5 @@ +This is an example of using English-style Unicode single quotes. +Row 1: +────── +\c\\u\\\r\\\\l\\\\\y\\\\\\: This is \an \\example ‘of using English-style Unicode single quotes.\ +{"1":1} diff --git a/tests/queries/0_stateless/03145_unicode_quotes.sql b/tests/queries/0_stateless/03145_unicode_quotes.sql new file mode 100644 index 00000000000..34a465cd874 --- /dev/null +++ b/tests/queries/0_stateless/03145_unicode_quotes.sql @@ -0,0 +1,9 @@ +-- They work: +SELECT ‘This is an example of using English-style Unicode single quotes.’ AS “curly”; + +-- It is unspecified which escaping rules apply inside the literal in Unicode quotes, and currently none apply (similarly to heredocs) +-- This could be changed. + +SELECT ‘This is \an \\example ‘of using English-style Unicode single quotes.\’ AS “\c\\u\\\r\\\\l\\\\\y\\\\\\” FORMAT Vertical; + +SELECT ‘’ = '' AS “1” FORMAT JSONLines; diff --git a/tests/queries/0_stateless/03146_bug47862.reference b/tests/queries/0_stateless/03146_bug47862.reference new file mode 100644 index 00000000000..bc6298e80ad --- /dev/null +++ b/tests/queries/0_stateless/03146_bug47862.reference @@ -0,0 +1 @@ +110 diff --git a/tests/queries/0_stateless/03146_bug47862.sql b/tests/queries/0_stateless/03146_bug47862.sql new file mode 100644 index 00000000000..918f2316bea --- /dev/null +++ b/tests/queries/0_stateless/03146_bug47862.sql @@ -0,0 +1,12 @@ +SELECT toInt64(lookup_res) AS cast_res +FROM ( + SELECT + indexOf(field_id, 10) AS val_idx, + ['110'][val_idx] AS lookup_res + FROM ( + SELECT arrayJoin([[10], [15]]) AS field_id + ) + WHERE val_idx != 0 +) +WHERE cast_res > 0 +SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03146_create_index_compatibility.reference b/tests/queries/0_stateless/03146_create_index_compatibility.reference new file mode 100644 index 00000000000..64f1d1382ee --- /dev/null +++ b/tests/queries/0_stateless/03146_create_index_compatibility.reference @@ -0,0 +1 @@ +CREATE TABLE default.t_index_3146\n(\n `a` UInt64,\n `b` UInt64,\n INDEX i1 a TYPE minmax GRANULARITY 1,\n INDEX i2 (a, b) TYPE minmax GRANULARITY 1,\n INDEX i3 (a, b) TYPE minmax GRANULARITY 1,\n INDEX i4 a TYPE minmax GRANULARITY 1\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/03146_create_index_compatibility.sql b/tests/queries/0_stateless/03146_create_index_compatibility.sql new file mode 100644 index 00000000000..ede5bc0567a --- /dev/null +++ b/tests/queries/0_stateless/03146_create_index_compatibility.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t_index_3146; + +CREATE TABLE t_index_3146 (a UInt64, b UInt64) ENGINE = MergeTree ORDER BY tuple(); + +SET allow_create_index_without_type = 1; + +CREATE INDEX i1 ON t_index_3146 (a) TYPE minmax; +CREATE INDEX i2 ON t_index_3146 (a, b) TYPE minmax; +CREATE INDEX i3 ON t_index_3146 (a DESC, b ASC) TYPE minmax; +CREATE INDEX i4 ON t_index_3146 a TYPE minmax; +CREATE INDEX i5 ON t_index_3146 (a); -- ignored +CREATE INDEX i6 ON t_index_3146 (a DESC, b ASC); -- ignored +CREATE INDEX i7 ON t_index_3146; -- { clientError SYNTAX_ERROR } +CREATE INDEX i8 ON t_index_3146 a, b TYPE minmax; -- { clientError SYNTAX_ERROR } + +SHOW CREATE TABLE t_index_3146; +DROP TABLE t_index_3146; diff --git a/tests/queries/0_stateless/03146_tpc_ds_grouping.reference b/tests/queries/0_stateless/03146_tpc_ds_grouping.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03146_tpc_ds_grouping.sql b/tests/queries/0_stateless/03146_tpc_ds_grouping.sql new file mode 100644 index 00000000000..f48c40e9bc4 --- /dev/null +++ b/tests/queries/0_stateless/03146_tpc_ds_grouping.sql @@ -0,0 +1,71 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/46335 +SET allow_experimental_analyzer = 1; +SELECT + key_a + key_b AS d, + rank() OVER () AS f +FROM + ( + SELECT + rand() % 10 AS key_a, + rand(1) % 5 AS key_b, + number + FROM numbers(100) + ) +GROUP BY + key_a, + key_b +WITH ROLLUP +ORDER BY multiIf(d = 0, key_a, NULL) ASC +FORMAT Null; + +SELECT + key_a + key_b AS d, + rank() OVER (PARTITION BY key_a + key_b) AS f +FROM + ( + SELECT + rand() % 10 AS key_a, + rand(1) % 5 AS key_b, + number + FROM numbers(100) + ) +GROUP BY + key_a, + key_b +WITH ROLLUP +ORDER BY multiIf(d = 0, key_a, NULL) ASC +FORMAT Null; + + +SELECT + grouping(key_a) + grouping(key_b) AS d, + rank() OVER (PARTITION BY grouping(key_a) + grouping(key_b), multiIf(grouping(key_b) = 0, key_a, NULL)) AS f +FROM + ( + SELECT + rand() % 10 AS key_a, + rand(1) % 5 AS key_b, + number + FROM numbers(100) + ) +GROUP BY + key_a, + key_b +WITH ROLLUP +ORDER BY multiIf(d = 0, key_a, NULL) ASC +FORMAT Null; + +SELECT grouping(key_a) + grouping(key_b) AS d +FROM + ( + SELECT + rand() % 10 AS key_a, + rand(toLowCardinality(1)) % 5 AS key_b, + number + FROM numbers(100) + ) +GROUP BY + key_a, + key_b +WITH ROLLUP +FORMAT Null; diff --git a/tests/queries/0_stateless/03147_datetime64_constant_index_analysis.reference b/tests/queries/0_stateless/03147_datetime64_constant_index_analysis.reference new file mode 100644 index 00000000000..9abcce11136 --- /dev/null +++ b/tests/queries/0_stateless/03147_datetime64_constant_index_analysis.reference @@ -0,0 +1,6 @@ +7385 +7385 +7385 +7385 +86401 +86401 diff --git a/tests/queries/0_stateless/03147_datetime64_constant_index_analysis.sql b/tests/queries/0_stateless/03147_datetime64_constant_index_analysis.sql new file mode 100644 index 00000000000..144478eb721 --- /dev/null +++ b/tests/queries/0_stateless/03147_datetime64_constant_index_analysis.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (d DateTime, PRIMARY KEY (d)); +INSERT INTO test SELECT toDateTime('2024-01-01') + number FROM numbers(1e6); +SET max_rows_to_read = 10000; +SELECT count() FROM test WHERE d <= '2024-01-01 02:03:04'; +SELECT count() FROM test WHERE d <= toDateTime('2024-01-01 02:03:04'); +SELECT count() FROM test WHERE d <= toDateTime64('2024-01-01 02:03:04', 0); +SELECT count() FROM test WHERE d <= toDateTime64('2024-01-01 02:03:04', 3); +SET max_rows_to_read = 100_000; +SELECT count() FROM test WHERE d <= '2024-01-02'; +SELECT count() FROM test WHERE d <= toDate('2024-01-02'); +DROP TABLE test; diff --git a/tests/queries/0_stateless/03147_parquet_memory_tracking.reference b/tests/queries/0_stateless/03147_parquet_memory_tracking.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/03147_parquet_memory_tracking.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03147_parquet_memory_tracking.sql b/tests/queries/0_stateless/03147_parquet_memory_tracking.sql new file mode 100644 index 00000000000..aeca04ffb9d --- /dev/null +++ b/tests/queries/0_stateless/03147_parquet_memory_tracking.sql @@ -0,0 +1,13 @@ +-- Tags: no-fasttest, no-parallel + +-- Create an ~80 MB parquet file with one row group and one column. +insert into function file('03147_parquet_memory_tracking.parquet') select number from numbers(10000000) settings output_format_parquet_compression_method='none', output_format_parquet_row_group_size=1000000000000, engine_file_truncate_on_insert=1; + +-- Try to read it with 60 MB memory limit. Should fail because we read the 80 MB column all at once. +select sum(ignore(*)) from file('03147_parquet_memory_tracking.parquet') settings max_memory_usage=60000000; -- { serverError CANNOT_ALLOCATE_MEMORY } + +-- Try to read it with 500 MB memory limit, just in case. +select sum(ignore(*)) from file('03147_parquet_memory_tracking.parquet') settings max_memory_usage=500000000; + +-- Truncate the file to avoid leaving too much garbage behind. +insert into function file('03147_parquet_memory_tracking.parquet') select number from numbers(1) settings engine_file_truncate_on_insert=1; diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.reference b/tests/queries/0_stateless/03147_system_columns_access_checks.reference new file mode 100644 index 00000000000..35438f11b31 --- /dev/null +++ b/tests/queries/0_stateless/03147_system_columns_access_checks.reference @@ -0,0 +1,2 @@ +........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ +end diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.sh b/tests/queries/0_stateless/03147_system_columns_access_checks.sh new file mode 100755 index 00000000000..b027ea28504 --- /dev/null +++ b/tests/queries/0_stateless/03147_system_columns_access_checks.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-ordinary-database, long, no-debug, no-asan, no-tsan, no-msan + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Create many tables in the database +NUM_TABLES=1000 +NUM_COLUMNS=1000 +THREADS=$(nproc) + +COLUMNS=$(seq 1 $NUM_COLUMNS | sed -r -e 's/(.+)/c\1 UInt8, /' | tr -d '\n') + +seq 1 $NUM_TABLES | xargs -P "${THREADS}" -I{} bash -c " + echo -n '.' + $CLICKHOUSE_CLIENT --query 'CREATE OR REPLACE TABLE test{} (${COLUMNS} end String) ENGINE = Memory' +" +echo + +$CLICKHOUSE_CLIENT --multiquery " +DROP USER IF EXISTS test_03147; +CREATE USER test_03147; +GRANT SELECT (end) ON ${CLICKHOUSE_DATABASE}.test1 TO test_03147; +" + +# This query was slow in previous ClickHouse versions for several reasons: +# - tables and databases without SHOW TABLES access were still checked for SHOW COLUMNS access for every column in every table; +# - excessive logging of "access granted" and "access denied" + +# The test could succeed even on the previous version, but it will show up as being too slow. +$CLICKHOUSE_CLIENT --user test_03147 --query "SELECT name FROM system.columns WHERE database = currentDatabase()" + +$CLICKHOUSE_CLIENT --multiquery " +DROP USER test_03147; +" diff --git a/tests/queries/0_stateless/03147_table_function_loop.reference b/tests/queries/0_stateless/03147_table_function_loop.reference new file mode 100644 index 00000000000..46a2310b65f --- /dev/null +++ b/tests/queries/0_stateless/03147_table_function_loop.reference @@ -0,0 +1,65 @@ +0 +1 +2 +0 +1 +2 +0 +1 +2 +0 +0 +1 +2 +0 +1 +2 +0 +1 +2 +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 diff --git a/tests/queries/0_stateless/03147_table_function_loop.sql b/tests/queries/0_stateless/03147_table_function_loop.sql new file mode 100644 index 00000000000..aa3c8e2def5 --- /dev/null +++ b/tests/queries/0_stateless/03147_table_function_loop.sql @@ -0,0 +1,16 @@ +-- Tags: no-parallel + +SELECT * FROM loop(numbers(3)) LIMIT 10; +SELECT * FROM loop (numbers(3)) LIMIT 10 settings max_block_size = 1; + +DROP DATABASE IF EXISTS 03147_db; +CREATE DATABASE IF NOT EXISTS 03147_db; +CREATE TABLE 03147_db.t (n Int8) ENGINE=MergeTree ORDER BY n; +INSERT INTO 03147_db.t SELECT * FROM numbers(10); +USE 03147_db; + +SELECT * FROM loop(03147_db.t) LIMIT 15; +SELECT * FROM loop(t) LIMIT 15; +SELECT * FROM loop(03147_db, t) LIMIT 15; + +SELECT * FROM loop('', '') -- { serverError UNKNOWN_TABLE } diff --git a/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.reference b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.reference new file mode 100644 index 00000000000..685d28268f6 --- /dev/null +++ b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.reference @@ -0,0 +1,44 @@ + +system.asynchronous_insert_log +Row 1: +────── +database: default +table: async_insert_landing +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values +format: Values +error: DB::Exc*****on: Cannot parse string 'Invalid' as UInt32: +populated_flush_query_id: 1 +system.query_log +Row 1: +────── +type: QueryStart +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing'] +columns: [] +views: [] +exception_code: 0 + +Row 2: +────── +type: QueryFinish +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing'] +columns: [] +views: [] +exception_code: 0 diff --git a/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh new file mode 100755 index 00000000000..9c290133bf9 --- /dev/null +++ b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function print_flush_query_logs() +{ + ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + echo "" + + echo "system.asynchronous_insert_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + database, + table, + query, + format, + extract(replace(exception::String, 'Exception', 'Exc*****on'), '.*UInt32:') as error, + not empty(flush_query_id) as populated_flush_query_id + FROM system.asynchronous_insert_log + WHERE + event_date >= yesterday() + AND query_id = '$1' + AND database = currentDatabase() + FORMAT Vertical" + + echo "system.query_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + replace(type::String, 'Exception', 'Exc*****on') as type, + read_rows, + read_bytes, + written_rows, + written_bytes, + result_rows, + result_bytes, + query, + query_kind, + databases, + tables, + columns, + views, + exception_code + FROM system.query_log + WHERE + event_date >= yesterday() + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') + -- AND current_database = currentDatabase() -- Just to silence style check: this is not ok for this test since the query uses default values + ORDER BY type DESC + FORMAT Vertical" +} + + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_insert_landing (id UInt32) ENGINE = MergeTree ORDER BY id" + +query_id="$(random_str 10)" +${CLICKHOUSE_CLIENT} --query_id="${query_id}" -q "INSERT INTO async_insert_landing SETTINGS wait_for_async_insert=0, async_insert=1 values ('Invalid')" 2>/dev/null || true +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" +print_flush_query_logs ${query_id} diff --git a/tests/queries/0_stateless/03148_mutations_virtual_columns.reference b/tests/queries/0_stateless/03148_mutations_virtual_columns.reference new file mode 100644 index 00000000000..7c5e8041147 --- /dev/null +++ b/tests/queries/0_stateless/03148_mutations_virtual_columns.reference @@ -0,0 +1 @@ +2 all_2_2_0 diff --git a/tests/queries/0_stateless/03148_mutations_virtual_columns.sql b/tests/queries/0_stateless/03148_mutations_virtual_columns.sql new file mode 100644 index 00000000000..045869b224a --- /dev/null +++ b/tests/queries/0_stateless/03148_mutations_virtual_columns.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS t_mut_virtuals; + +CREATE TABLE t_mut_virtuals (id UInt64, s String) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_mut_virtuals VALUES (1, 'a'); +INSERT INTO t_mut_virtuals VALUES (2, 'b'); + +SET insert_keeper_fault_injection_probability = 0; +SET mutations_sync = 2; + +ALTER TABLE t_mut_virtuals UPDATE s = _part WHERE 1; +ALTER TABLE t_mut_virtuals DELETE WHERE _part LIKE 'all_1_1_0%'; + +SELECT * FROM t_mut_virtuals ORDER BY id; + +DROP TABLE t_mut_virtuals; diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference new file mode 100644 index 00000000000..4fa3a14e63f --- /dev/null +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference @@ -0,0 +1,4 @@ +simple_with_analyzer ['default.03148_dictionary'] +nested_with_analyzer ['default.03148_dictionary'] +simple_without_analyzer ['default.03148_dictionary'] +nested_without_analyzer ['default.03148_dictionary'] diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql new file mode 100644 index 00000000000..1b647a7ee62 --- /dev/null +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql @@ -0,0 +1,84 @@ +DROP DICTIONARY IF EXISTS 03148_dictionary; + +CREATE DICTIONARY 03148_dictionary ( + id UInt64, + name String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE( + QUERY 'select 0 as id, ''name0'' as name' +)) +LIFETIME(MIN 1 MAX 10) +LAYOUT(HASHED); + +SELECT + dictGet('03148_dictionary', 'name', number) as dict_value +FROM numbers(1) +SETTINGS + allow_experimental_analyzer = 1, + log_comment = 'simple_with_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, used_dictionaries +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'simple_with_analyzer'; + +SELECT * +FROM ( + SELECT + dictGet('03148_dictionary', 'name', number) as dict_value + FROM numbers(1) +) t +SETTINGS + allow_experimental_analyzer = 1, + log_comment = 'nested_with_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, used_dictionaries +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'nested_with_analyzer'; + +SELECT + dictGet('03148_dictionary', 'name', number) as dict_value +FROM numbers(1) +SETTINGS + allow_experimental_analyzer = 0, + log_comment = 'simple_without_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, used_dictionaries +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'simple_without_analyzer'; + +SELECT * +FROM ( + SELECT + dictGet('03148_dictionary', 'name', number) as dict_value + FROM numbers(1) +) t +SETTINGS + allow_experimental_analyzer = 0, + log_comment = 'nested_without_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, used_dictionaries +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'nested_without_analyzer'; + +DROP DICTIONARY IF EXISTS 03148_dictionary; diff --git a/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.reference b/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.sql b/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.sql new file mode 100644 index 00000000000..af326c15bd8 --- /dev/null +++ b/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE = MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, 'Value_0'); + +SELECT * FROM test_table SETTINGS max_threads = 1025, max_streams_to_max_threads_ratio = -9223372036854775808, allow_experimental_analyzer = 1; -- { serverError PARAMETER_OUT_OF_BOUND } + +SELECT * FROM test_table SETTINGS max_threads = 1025, max_streams_to_max_threads_ratio = -9223372036854775808, allow_experimental_analyzer = 0; -- { serverError PARAMETER_OUT_OF_BOUND } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/03149_analyzer_window_redefinition.reference b/tests/queries/0_stateless/03149_analyzer_window_redefinition.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03149_analyzer_window_redefinition.sql b/tests/queries/0_stateless/03149_analyzer_window_redefinition.sql new file mode 100644 index 00000000000..7bc5ec7579c --- /dev/null +++ b/tests/queries/0_stateless/03149_analyzer_window_redefinition.sql @@ -0,0 +1,8 @@ +CREATE TABLE users (uid Int16, name String, age Int16) ENGINE=MergeTree ORDER BY tuple(); + +INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users VALUES (6666, 'Ksenia', 48); +INSERT INTO users VALUES (8888, 'Alice', 50); + +SELECT count(*) OVER w +FROM users WINDOW w AS (ORDER BY uid), w AS(ORDER BY name); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh new file mode 100755 index 00000000000..6f70a0d2536 --- /dev/null +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0" 2>&1 | grep -q "Sanity check: 'max_block_size' cannot be 0. Set to default value" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/03149_variant_pop_back_typo.reference b/tests/queries/0_stateless/03149_variant_pop_back_typo.reference new file mode 100644 index 00000000000..d60a254418f --- /dev/null +++ b/tests/queries/0_stateless/03149_variant_pop_back_typo.reference @@ -0,0 +1 @@ +[{1:[],'':1},{'':1,1:'',1:1}] diff --git a/tests/queries/0_stateless/03149_variant_pop_back_typo.sql b/tests/queries/0_stateless/03149_variant_pop_back_typo.sql new file mode 100644 index 00000000000..c35a7b708c6 --- /dev/null +++ b/tests/queries/0_stateless/03149_variant_pop_back_typo.sql @@ -0,0 +1 @@ +select [map(1, [], '', 1), map('', 1, 1, '', toUInt128(1), 1)] settings allow_experimental_variant_type=1, use_variant_as_common_type=1 \ No newline at end of file diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference new file mode 100644 index 00000000000..0b76d30953e --- /dev/null +++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference @@ -0,0 +1,35 @@ +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +3 1 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +2 1704056400 Decimal(18, 3) +3 1 String +3 1 String +4 2 String +4 2 String + +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +4 2 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 Decimal(18, 3) +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +3 1 String +4 2 String +4 2 String +4 2 String diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql new file mode 100644 index 00000000000..ad5ea9512c6 --- /dev/null +++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql @@ -0,0 +1,34 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE null_table +( + n1 UInt8, + n2 Dynamic(max_types=3) +) +ENGINE = Null; + +CREATE MATERIALIZED VIEW dummy_rmv TO to_table +AS SELECT * FROM null_table; + +CREATE TABLE to_table +( + n1 UInt8, + n2 Dynamic(max_types=4) +) +ENGINE = MergeTree ORDER BY n1; + +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=10); +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; diff --git a/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.reference b/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.reference new file mode 100644 index 00000000000..209c455b6f5 --- /dev/null +++ b/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.reference @@ -0,0 +1,14 @@ +2023-01-05 hello, world +2023-01-05 \N +2023-01-05 hello, world +2023-01-05 \N +2023-01-05 +2023-01-05 +2023-01-05 hello, world +2023-01-05 \N +2023-01-05 hello, world +2023-01-05 \N +2023-01-05 10 +2023-01-05 10 +2023-01-05 hello, world +2023-01-05 \N diff --git a/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.sql b/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.sql new file mode 100644 index 00000000000..a0bd1381351 --- /dev/null +++ b/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS test_grouping_sets_predicate; + +CREATE TABLE test_grouping_sets_predicate ( day_ Date, type_1 String ) ENGINE=MergeTree ORDER BY day_; + +INSERT INTO test_grouping_sets_predicate SELECT toDate('2023-01-05') AS day_, 'hello, world' FROM numbers (10); + +SET group_by_use_nulls = true; + +SELECT * +FROM ( SELECT day_, type_1 FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) ) +WHERE day_ = '2023-01-05' +ORDER BY ALL; + + +SELECT * +FROM ( SELECT * FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) ) +WHERE day_ = '2023-01-05' +ORDER BY ALL; + +SELECT * +FROM ( SELECT day_ FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) ) +WHERE day_ = '2023-01-05' +ORDER BY * +SETTINGS allow_experimental_analyzer=1; + +SELECT * +FROM ( SELECT * FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) ) +WHERE day_ = '2023-01-05' +GROUP BY * +ORDER BY ALL +SETTINGS allow_experimental_analyzer=1; + +SELECT * +FROM ( SELECT * FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (*), (day_) ) ) +WHERE day_ = '2023-01-05' +GROUP BY GROUPING SETS (*) +ORDER BY type_1 +SETTINGS allow_experimental_analyzer=1; + +SELECT * +FROM ( SELECT day_, COUNT(*) FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) ) +WHERE day_ = '2023-01-05' +ORDER BY ALL; + + +SELECT t2.* +FROM ( SELECT t1.* FROM test_grouping_sets_predicate t1 GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) ) t2 +WHERE day_ = '2023-01-05' +ORDER BY ALL; diff --git a/tests/queries/0_stateless/03150_trace_log_add_build_id.reference b/tests/queries/0_stateless/03150_trace_log_add_build_id.reference new file mode 100644 index 00000000000..0d66ea1aee9 --- /dev/null +++ b/tests/queries/0_stateless/03150_trace_log_add_build_id.reference @@ -0,0 +1,2 @@ +0 +1 diff --git a/tests/queries/0_stateless/03150_trace_log_add_build_id.sql b/tests/queries/0_stateless/03150_trace_log_add_build_id.sql new file mode 100644 index 00000000000..75122de47b5 --- /dev/null +++ b/tests/queries/0_stateless/03150_trace_log_add_build_id.sql @@ -0,0 +1,10 @@ +-- Tags: no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +SET log_queries = 1; +SET log_query_threads = 1; +SET query_profiler_real_time_period_ns = 100000000; +SELECT sleep(1); +SYSTEM FLUSH LOGS; + +SELECT COUNT(*) > 1 FROM system.trace_log WHERE build_id IS NOT NULL; + diff --git a/tests/queries/0_stateless/03150_url_hash_non_constant_level.reference b/tests/queries/0_stateless/03150_url_hash_non_constant_level.reference new file mode 100644 index 00000000000..21809584f4f --- /dev/null +++ b/tests/queries/0_stateless/03150_url_hash_non_constant_level.reference @@ -0,0 +1,35 @@ +6957009580723342023 +12997754005206516270 +18162116495741479957 +7346166079942937387 +8792485758641698305 +16989076615821526161 +11160318154034397263 +11160318154034397263 +11160318154034397263 +11160318154034397263 +--- +6957009580723342023 +12997754005206516270 +18162116495741479957 +7346166079942937387 +8792485758641698305 +16989076615821526161 +11160318154034397263 +11160318154034397263 +11160318154034397263 +11160318154034397263 +--- +6957009580723342023 https://www3.botinok.co.edu.il/ +12997754005206516270 https://www3.botinok.co.edu.il/~kozlevich/ +18162116495741479957 https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/ +7346166079942937387 https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL? +8792485758641698305 https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL?longptr=0xFFFFFFFF&ONERR=CONTINUE# +16989076615821526161 https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL?longptr=0xFFFFFFFF&ONERR=CONTINUE#!PGNUM=99 +--- +6957009580723342023 https://www3.botinok.co.edu.il/ +12997754005206516270 https://www3.botinok.co.edu.il/~kozlevich/ +18162116495741479957 https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/ +7346166079942937387 https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL? +8792485758641698305 https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL?longptr=0xFFFFFFFF&ONERR=CONTINUE# +16989076615821526161 https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL?longptr=0xFFFFFFFF&ONERR=CONTINUE#!PGNUM=99 diff --git a/tests/queries/0_stateless/03150_url_hash_non_constant_level.sql b/tests/queries/0_stateless/03150_url_hash_non_constant_level.sql new file mode 100644 index 00000000000..8afda0f9cfb --- /dev/null +++ b/tests/queries/0_stateless/03150_url_hash_non_constant_level.sql @@ -0,0 +1,7 @@ +WITH 'https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL?longptr=0xFFFFFFFF&ONERR=CONTINUE#!PGNUM=99' AS url SELECT URLHash(url, arrayJoin(range(10))); +SELECT '---'; +WITH 'https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL?longptr=0xFFFFFFFF&ONERR=CONTINUE#!PGNUM=99' AS url SELECT URLHash(materialize(url), arrayJoin(range(10))); +SELECT '---'; +WITH 'https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL?longptr=0xFFFFFFFF&ONERR=CONTINUE#!PGNUM=99' AS url SELECT cityHash64(substring(x, -1, 1) IN ('/', '?', '#') ? substring(x, 1, -1) : x), arrayJoin(URLHierarchy(url)) AS x; +SELECT '---'; +WITH 'https://www3.botinok.co.edu.il/~kozlevich/CGI-BIN/WEBSIT~0.DLL?longptr=0xFFFFFFFF&ONERR=CONTINUE#!PGNUM=99' AS url SELECT cityHash64(substring(x, -1, 1) IN ('/', '?', '#') ? substring(x, 1, -1) : x), arrayJoin(URLHierarchy(materialize(url))) AS x; diff --git a/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.reference b/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.reference new file mode 100644 index 00000000000..0ec4e34ebfe --- /dev/null +++ b/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.reference @@ -0,0 +1,8 @@ +Expression ((Project names + Projection)) +Header: sum(id) UInt64 + Aggregating + Header: sum(__table1.id) UInt64 + Expression ((Before GROUP BY + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))) + Header: __table1.id UInt64 + ReadFromPreparedSource (Read from NullSource) + Header: id UInt64 diff --git a/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.sql b/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.sql new file mode 100644 index 00000000000..40204b5cd03 --- /dev/null +++ b/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +DROP VIEW IF EXISTS test_view; +CREATE VIEW test_view AS SELECT id, value FROM test_table; + +EXPLAIN header = 1 SELECT sum(id) FROM test_view settings allow_experimental_analyzer=1; + +DROP VIEW test_view; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference new file mode 100644 index 00000000000..d96fbf658d8 --- /dev/null +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference @@ -0,0 +1,26 @@ +1 2024-01-01 Date +2 1704056400 String +3 1 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +2 1704056400 String +3 1 Float32 +3 1 String +4 2 Float64 +4 2 String + +1 2024-01-01 String +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 String +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +3 1 String +4 2 String +4 2 String +4 2 String diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql new file mode 100644 index 00000000000..632f3504fdb --- /dev/null +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql @@ -0,0 +1,26 @@ +SET allow_experimental_dynamic_type=1; +set min_compress_block_size = 585572, max_compress_block_size = 373374, max_block_size = 60768, max_joined_block_size_rows = 18966, max_insert_threads = 5, max_threads = 50, max_read_buffer_size = 708232, connect_timeout_with_failover_ms = 2000, connect_timeout_with_failover_secure_ms = 3000, idle_connection_timeout = 36000, use_uncompressed_cache = true, stream_like_engine_allow_direct_select = true, replication_wait_for_inactive_replica_timeout = 30, compile_aggregate_expressions = false, min_count_to_compile_aggregate_expression = 0, compile_sort_description = false, group_by_two_level_threshold = 1000000, group_by_two_level_threshold_bytes = 12610083, enable_memory_bound_merging_of_aggregation_results = false, min_chunk_bytes_for_parallel_parsing = 18769830, merge_tree_coarse_index_granularity = 12, min_bytes_to_use_direct_io = 10737418240, min_bytes_to_use_mmap_io = 10737418240, log_queries = true, insert_quorum_timeout = 60000, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.05000000074505806, http_response_buffer_size = 294986, fsync_metadata = true, http_send_timeout = 60., http_receive_timeout = 60., opentelemetry_start_trace_probability = 0.10000000149011612, max_bytes_before_external_group_by = 1, max_bytes_before_external_sort = 10737418240, max_bytes_before_remerge_sort = 1326536545, max_untracked_memory = 1048576, memory_profiler_step = 1048576, log_comment = '03151_dynamic_type_scale_max_types.sql', send_logs_level = 'fatal', prefer_localhost_replica = false, optimize_read_in_order = false, optimize_aggregation_in_order = true, aggregation_in_order_max_block_bytes = 27069500, read_in_order_two_level_merge_threshold = 75, allow_introspection_functions = true, database_atomic_wait_for_drop_and_detach_synchronously = true, remote_filesystem_read_method = 'read', local_filesystem_read_prefetch = true, remote_filesystem_read_prefetch = false, merge_tree_compact_parts_min_granules_to_multibuffer_read = 119, async_insert_busy_timeout_max_ms = 5000, read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true, filesystem_cache_segments_batch_size = 10, use_page_cache_for_disks_without_file_cache = true, page_cache_inject_eviction = true, allow_prefetched_read_pool_for_remote_filesystem = false, filesystem_prefetch_step_marks = 50, filesystem_prefetch_min_bytes_for_single_read_task = 16777216, filesystem_prefetch_max_memory_usage = 134217728, filesystem_prefetches_limit = 10, optimize_sorting_by_input_stream_properties = false, allow_experimental_dynamic_type = true, session_timezone = 'Africa/Khartoum', prefer_warmed_unmerged_parts_seconds = 2; + +drop table if exists to_table; + +CREATE TABLE to_table +( + n1 UInt8, + n2 Dynamic(max_types=2) +) +ENGINE = MergeTree ORDER BY n1; + +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=5); +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1); +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=500); -- { serverError UNEXPECTED_AST_STRUCTURE } diff --git a/tests/queries/0_stateless/03151_external_cross_join.reference b/tests/queries/0_stateless/03151_external_cross_join.reference new file mode 100644 index 00000000000..057eadec0e4 --- /dev/null +++ b/tests/queries/0_stateless/03151_external_cross_join.reference @@ -0,0 +1,6 @@ +1 1 2 +10 55 11 +100 5050 101 +1000 500500 1001 +10000 50005000 10001 +100000 5000050000 100001 diff --git a/tests/queries/0_stateless/03151_external_cross_join.sql b/tests/queries/0_stateless/03151_external_cross_join.sql new file mode 100644 index 00000000000..e0e05a10e1e --- /dev/null +++ b/tests/queries/0_stateless/03151_external_cross_join.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (x Int32) ENGINE = Memory; + +-- insert several blocks with 1 or 2 rows: +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (10),(100); +INSERT INTO t1 VALUES (1000); +INSERT INTO t1 VALUES (10000),(100000); + +SET max_rows_in_join = 111; + +SELECT x, sum(number), count(), FROM ( + SELECT t1.x, t2.number + FROM t1 + CROSS JOIN numbers_mt(10_000_000) t2 + WHERE number <= x +) +GROUP BY ALL +ORDER BY x +; \ No newline at end of file diff --git a/tests/queries/0_stateless/03151_pmj_join_non_procssed_clash.reference b/tests/queries/0_stateless/03151_pmj_join_non_procssed_clash.reference new file mode 100644 index 00000000000..3fbea507f11 --- /dev/null +++ b/tests/queries/0_stateless/03151_pmj_join_non_procssed_clash.reference @@ -0,0 +1 @@ +9900 49990050 49990050 49990050 diff --git a/tests/queries/0_stateless/03151_pmj_join_non_procssed_clash.sql b/tests/queries/0_stateless/03151_pmj_join_non_procssed_clash.sql new file mode 100644 index 00000000000..a54de889760 --- /dev/null +++ b/tests/queries/0_stateless/03151_pmj_join_non_procssed_clash.sql @@ -0,0 +1,8 @@ +SET join_algorithm = 'partial_merge'; +SET max_joined_block_size_rows = 100; + + +SELECT count(ignore(*)), sum(t1.a), sum(t1.b), sum(t2.a) +FROM ( SELECT number AS a, number AS b FROM numbers(10000) ) t1 +JOIN ( SELECT number + 100 AS a FROM numbers(10000) ) t2 +ON t1.a = t2.a AND t1.b > 0; diff --git a/tests/queries/0_stateless/03151_redundant_distinct_with_window.reference b/tests/queries/0_stateless/03151_redundant_distinct_with_window.reference new file mode 100644 index 00000000000..e321055f1e2 --- /dev/null +++ b/tests/queries/0_stateless/03151_redundant_distinct_with_window.reference @@ -0,0 +1,7 @@ +1 +2 +3 +-------- +1 2023-01-14 00:00:00 +2 2023-01-14 00:00:00 +3 2023-01-14 00:00:00 diff --git a/tests/queries/0_stateless/03151_redundant_distinct_with_window.sql b/tests/queries/0_stateless/03151_redundant_distinct_with_window.sql new file mode 100644 index 00000000000..79e0074e91b --- /dev/null +++ b/tests/queries/0_stateless/03151_redundant_distinct_with_window.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS tab; +DROP TABLE IF EXISTS tab_v; + +CREATE TABLE tab (id Int32, val Nullable(Float64), dt Nullable(DateTime64(6)), type Nullable(Int32)) ENGINE = MergeTree ORDER BY id; + +insert into tab values (1,10,'2023-01-14 00:00:00',1),(2,20,'2023-01-14 00:00:00',1),(3,20,'2023-01-14 00:00:00',2),(4,40,'2023-01-14 00:00:00',3),(5,50,'2023-01-14 00:00:00',3); + +CREATE VIEW tab_v AS SELECT + t1.type AS type, + sum(t1.val) AS sval, + toStartOfDay(t1.dt) AS sday, + anyLast(sval) OVER w AS lval +FROM tab AS t1 +GROUP BY + type, + sday +WINDOW w AS (PARTITION BY type); + +select distinct type from tab_v order by type; +select '--------'; +select distinct type, sday from tab_v order by type, sday; diff --git a/tests/queries/0_stateless/03151_unload_index_race.reference b/tests/queries/0_stateless/03151_unload_index_race.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03151_unload_index_race.sh b/tests/queries/0_stateless/03151_unload_index_race.sh new file mode 100755 index 00000000000..7e9dfa7cddc --- /dev/null +++ b/tests/queries/0_stateless/03151_unload_index_race.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long, no-parallel +# Disable parallel since it creates 10 different threads querying and might overload the server + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "create table t(a UInt32, b UInt32, c UInt32) engine=MergeTree order by (a, b, c) settings index_granularity=1;" +$CLICKHOUSE_CLIENT -q "system stop merges t;" + +# In this part a only changes 10% of the time, b 50% of the time, c all the time +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" + +# In this part a only changes 33% of the time, b 50% of the time, c 10 % of the time +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" + +# In this part a changes 100% of the time +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" + + +# In this part a changes 100% of the time +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" + +function thread_alter_settings() +{ + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -n --query "ALTER TABLE t MODIFY SETTING primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns=0.$RANDOM" + $CLICKHOUSE_CLIENT -n --query "SYSTEM UNLOAD PRIMARY KEY t" + sleep 0.0$RANDOM + done +} + +function thread_query_table() +{ + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + COUNT=$($CLICKHOUSE_CLIENT -n --query "SELECT count() FROM t where not ignore(*);") + if [ "$COUNT" -ne "2000" ]; then + echo "$COUNT" + fi + done +} + +export -f thread_alter_settings +export -f thread_query_table + +TIMEOUT=10 + +thread_alter_settings $TIMEOUT & +for _ in $(seq 1 10); +do + thread_query_table $TIMEOUT & +done + +wait + +$CLICKHOUSE_CLIENT -q "SELECT count() FROM t FORMAT Null" diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.reference b/tests/queries/0_stateless/03152_analyzer_columns_list.reference new file mode 100644 index 00000000000..4e9025b5baf --- /dev/null +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.reference @@ -0,0 +1 @@ +4 3 diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.sql b/tests/queries/0_stateless/03152_analyzer_columns_list.sql new file mode 100644 index 00000000000..baed3a4ff68 --- /dev/null +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.sql @@ -0,0 +1,13 @@ +CREATE TABLE test +( + foo String, + bar String, +) +ENGINE = MergeTree() +ORDER BY (foo, bar); + +INSERT INTO test VALUES ('foo', 'bar1'); + +SELECT COLUMNS(bar, foo) APPLY (length) FROM test; + +SELECT COLUMNS(bar, foo, xyz) APPLY (length) FROM test; -- { serverError UNKNOWN_IDENTIFIER } diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.reference b/tests/queries/0_stateless/03152_dynamic_type_simple.reference new file mode 100644 index 00000000000..5f243209ff3 --- /dev/null +++ b/tests/queries/0_stateless/03152_dynamic_type_simple.reference @@ -0,0 +1,25 @@ +string1 String +42 Int64 +3.14 Float64 +[1,2] Array(Int64) +2021-01-01 Date +string2 String + +\N None 42 Int64 +42 Int64 string String +string String [1, 2] String +[1,2] Array(Int64) \N None + ┌─d────────────────────────┬─dynamicType(d)─┬─d.Int64─┬─d.String─┬─────d.Date─┬─d.Float64─┬──────────d.DateTime─┬─d.Array(Int64)─┬─d.Array(String)──────────┐ + 1. │ 42 │ Int64 │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 2. │ string1 │ String │ ᴺᵁᴸᴸ │ string1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 3. │ 2021-01-01 │ Date │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2021-01-01 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 4. │ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ [] │ + 5. │ 3.14 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 3.14 │ ᴺᵁᴸᴸ │ [] │ [] │ + 6. │ string2 │ String │ ᴺᵁᴸᴸ │ string2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 7. │ 2021-01-01 12:00:00 │ DateTime │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2021-01-01 12:00:00 │ [] │ [] │ + 8. │ ['array','of','strings'] │ Array(String) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ['array','of','strings'] │ + 9. │ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ +10. │ 42.42 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │ [] │ + └──────────────────────────┴────────────────┴─────────┴──────────┴────────────┴───────────┴─────────────────────┴────────────────┴──────────────────────────┘ + +49995000 diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.sql b/tests/queries/0_stateless/03152_dynamic_type_simple.sql new file mode 100644 index 00000000000..fd5328faf15 --- /dev/null +++ b/tests/queries/0_stateless/03152_dynamic_type_simple.sql @@ -0,0 +1,29 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE test_max_types (d Dynamic(max_types=5)) ENGINE = Memory; +INSERT INTO test_max_types VALUES ('string1'), (42), (3.14), ([1, 2]), (toDate('2021-01-01')), ('string2'); +SELECT d, dynamicType(d) FROM test_max_types; + +SELECT ''; +CREATE TABLE test_nested_dynamic (d1 Dynamic, d2 Dynamic(max_types=2)) ENGINE = Memory; +INSERT INTO test_nested_dynamic VALUES (NULL, 42), (42, 'string'), ('string', [1, 2]), ([1, 2], NULL); +SELECT d1, dynamicType(d1), d2, dynamicType(d2) FROM test_nested_dynamic; + +CREATE TABLE test_rapid_schema (d Dynamic) ENGINE = Memory; +INSERT INTO test_rapid_schema VALUES (42), ('string1'), (toDate('2021-01-01')), ([1, 2, 3]), (3.14), ('string2'), (toDateTime('2021-01-01 12:00:00')), (['array', 'of', 'strings']), (NULL), (toFloat64(42.42)); + +SELECT d, dynamicType(d), d.Int64, d.String, d.Date, d.Float64, d.DateTime, d.`Array(Int64)`, d.`Array(String)` +FROM test_rapid_schema FORMAT PrettyCompactMonoBlock; + + +SELECT ''; +SELECT finalizeAggregation(CAST(dynamic_state, 'AggregateFunction(sum, UInt64)')) +FROM +( + SELECT CAST(state, 'Dynamic') AS dynamic_state + FROM + ( + SELECT sumState(number) AS state + FROM numbers(10000) + ) +); diff --git a/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.reference b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.reference new file mode 100644 index 00000000000..7058d36aaf9 --- /dev/null +++ b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.reference @@ -0,0 +1,91 @@ +-- { echoOn } + +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users INNER JOIN users2 USING name WHERE users.name ='Alice'; +Expression ((Project names + (Projection + ))) +Header: name String + Join (JOIN FillRightFirst) + Header: __table1.name String + Filter (( + Change column names to column identifiers)) + Header: __table1.name String + ReadFromMergeTree (default.users) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 + Filter (( + Change column names to column identifiers)) + Header: __table2.name String + ReadFromMergeTree (default.users2) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 +SELECT '--'; +-- +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users LEFT JOIN users2 USING name WHERE users.name ='Alice'; +Expression ((Project names + (Projection + ))) +Header: name String + Join (JOIN FillRightFirst) + Header: __table1.name String + Filter (( + Change column names to column identifiers)) + Header: __table1.name String + ReadFromMergeTree (default.users) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 + Filter (( + Change column names to column identifiers)) + Header: __table2.name String + ReadFromMergeTree (default.users2) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 +SELECT '--'; +-- +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users RIGHT JOIN users2 USING name WHERE users2.name ='Alice'; +Expression ((Project names + (Projection + ))) +Header: name String + Join (JOIN FillRightFirst) + Header: __table1.name String + __table2.name String + Filter (( + Change column names to column identifiers)) + Header: __table1.name String + ReadFromMergeTree (default.users) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 + Filter (( + Change column names to column identifiers)) + Header: __table2.name String + ReadFromMergeTree (default.users2) + Header: name String + Indexes: + PrimaryKey + Keys: + name + Condition: (name in [\'Alice\', \'Alice\']) + Parts: 1/3 + Granules: 1/3 diff --git a/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql new file mode 100644 index 00000000000..645e89034d7 --- /dev/null +++ b/tests/queries/0_stateless/03152_join_filter_push_down_equivalent_columns.sql @@ -0,0 +1,35 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS users; +CREATE TABLE users (uid Int16, name String, age Int16) ENGINE=MergeTree order by (uid, name); + +INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users VALUES (6666, 'Ksenia', 48); +INSERT INTO users VALUES (8888, 'Alice', 50); + +DROP TABLE IF EXISTS users2; +CREATE TABLE users2 (uid Int16, name String, age2 Int16) ENGINE=MergeTree order by (uid, name); + +INSERT INTO users2 VALUES (1231, 'John', 33); +INSERT INTO users2 VALUES (6666, 'Ksenia', 48); +INSERT INTO users2 VALUES (8888, 'Alice', 50); + +-- { echoOn } + +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users INNER JOIN users2 USING name WHERE users.name ='Alice'; + +SELECT '--'; + +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users LEFT JOIN users2 USING name WHERE users.name ='Alice'; + +SELECT '--'; + +EXPLAIN header = 1, indexes = 1 +SELECT name FROM users RIGHT JOIN users2 USING name WHERE users2.name ='Alice'; + +-- { echoOff } + +DROP TABLE users; +DROP TABLE users2; diff --git a/tests/queries/0_stateless/03152_trailing_comma_in_columns_list_in_insert.reference b/tests/queries/0_stateless/03152_trailing_comma_in_columns_list_in_insert.reference new file mode 100644 index 00000000000..6622044ee6e --- /dev/null +++ b/tests/queries/0_stateless/03152_trailing_comma_in_columns_list_in_insert.reference @@ -0,0 +1,2 @@ +1 2 3 +4 5 6 diff --git a/tests/queries/0_stateless/03152_trailing_comma_in_columns_list_in_insert.sql b/tests/queries/0_stateless/03152_trailing_comma_in_columns_list_in_insert.sql new file mode 100644 index 00000000000..4031f9a7762 --- /dev/null +++ b/tests/queries/0_stateless/03152_trailing_comma_in_columns_list_in_insert.sql @@ -0,0 +1,4 @@ +CREATE TEMPORARY TABLE test (a UInt8, b UInt8, c UInt8); +INSERT INTO test (a, b, c, ) VALUES (1, 2, 3); +INSERT INTO test (a, b, c) VALUES (4, 5, 6); +SELECT * FROM test ORDER BY a; diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.reference b/tests/queries/0_stateless/03153_dynamic_type_empty.reference new file mode 100644 index 00000000000..f7c047dcd19 --- /dev/null +++ b/tests/queries/0_stateless/03153_dynamic_type_empty.reference @@ -0,0 +1,15 @@ +[] String +[1] Array(Int64) +[] Array(Int64) +['1'] Array(String) +[] Array(Int64) +() String +(1) Tuple(Int64) +(0) Tuple(Int64) +('1') Tuple(String) +(0) Tuple(Int64) +{} String +{1:2} Map(Int64, Int64) +{} Map(Int64, Int64) +{'1':'2'} Map(String, String) +{} Map(Int64, Int64) diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.sql b/tests/queries/0_stateless/03153_dynamic_type_empty.sql new file mode 100644 index 00000000000..8e942fe6f6e --- /dev/null +++ b/tests/queries/0_stateless/03153_dynamic_type_empty.sql @@ -0,0 +1,5 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE test_null_empty (d Dynamic) ENGINE = Memory; +INSERT INTO test_null_empty VALUES ([]), ([1]), ([]), (['1']), ([]), (()),((1)), (()), (('1')), (()), ({}), ({1:2}), ({}), ({'1':'2'}), ({}); +SELECT d, dynamicType(d) FROM test_null_empty; diff --git a/tests/queries/0_stateless/03153_format_regexp_usability.reference b/tests/queries/0_stateless/03153_format_regexp_usability.reference new file mode 100644 index 00000000000..c41b0adbd84 --- /dev/null +++ b/tests/queries/0_stateless/03153_format_regexp_usability.reference @@ -0,0 +1,2 @@ +regular expression is not set +`Upyachka` diff --git a/tests/queries/0_stateless/03153_format_regexp_usability.sh b/tests/queries/0_stateless/03153_format_regexp_usability.sh new file mode 100755 index 00000000000..03bed10dd17 --- /dev/null +++ b/tests/queries/0_stateless/03153_format_regexp_usability.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-ordinary-database, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --query "SELECT * FROM format(Regexp, 's String', 'Hello')" 2>&1 | grep -o -F 'regular expression is not set' +${CLICKHOUSE_LOCAL} --query "SELECT * FROM format(Regexp, 's String', 'Hello') SETTINGS format_regexp = 'Upyachka'" 2>&1 | grep -o -F '`Upyachka`' diff --git a/tests/queries/0_stateless/03153_trailing_comma_in_values_list_in_insert.reference b/tests/queries/0_stateless/03153_trailing_comma_in_values_list_in_insert.reference new file mode 100644 index 00000000000..e115855485d --- /dev/null +++ b/tests/queries/0_stateless/03153_trailing_comma_in_values_list_in_insert.reference @@ -0,0 +1,3 @@ +1 2 3 +4 5 6 +7 8 9 diff --git a/tests/queries/0_stateless/03153_trailing_comma_in_values_list_in_insert.sql b/tests/queries/0_stateless/03153_trailing_comma_in_values_list_in_insert.sql new file mode 100644 index 00000000000..65301c977c2 --- /dev/null +++ b/tests/queries/0_stateless/03153_trailing_comma_in_values_list_in_insert.sql @@ -0,0 +1,5 @@ +CREATE TEMPORARY TABLE test (a UInt8, b UInt8, c UInt8); +INSERT INTO test (a, b, c) VALUES (1, 2, 3, ); +INSERT INTO test (a, b, c) VALUES (4, 5, 6,); +INSERT INTO test (a, b, c) VALUES (7, 8, 9); +SELECT * FROM test ORDER BY a; diff --git a/tests/queries/0_stateless/03154_lazy_token_iterator.reference b/tests/queries/0_stateless/03154_lazy_token_iterator.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03154_lazy_token_iterator.sh b/tests/queries/0_stateless/03154_lazy_token_iterator.sh new file mode 100755 index 00000000000..4794dafda4b --- /dev/null +++ b/tests/queries/0_stateless/03154_lazy_token_iterator.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-ordinary-database, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# In previous versions this command took longer than ten minutes. Now it takes less than a second in release mode: + +python3 -c 'import sys; import struct; sys.stdout.buffer.write(b"".join(struct.pack(" diff --git a/tests/queries/0_stateless/03155_explain_current_transaction.sql b/tests/queries/0_stateless/03155_explain_current_transaction.sql new file mode 100644 index 00000000000..fa0fd06e798 --- /dev/null +++ b/tests/queries/0_stateless/03155_explain_current_transaction.sql @@ -0,0 +1 @@ +EXPLAIN CURRENT TRANSACTION; diff --git a/tests/queries/0_stateless/03155_in_nested_subselects.reference b/tests/queries/0_stateless/03155_in_nested_subselects.reference new file mode 100644 index 00000000000..5565ed6787f --- /dev/null +++ b/tests/queries/0_stateless/03155_in_nested_subselects.reference @@ -0,0 +1,4 @@ +0 +1 +0 +1 diff --git a/tests/queries/0_stateless/03155_in_nested_subselects.sql b/tests/queries/0_stateless/03155_in_nested_subselects.sql new file mode 100644 index 00000000000..4f5ccd30aa3 --- /dev/null +++ b/tests/queries/0_stateless/03155_in_nested_subselects.sql @@ -0,0 +1,19 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/63833 +SET allow_experimental_analyzer = 1; + +create table Example (id Int32) engine = MergeTree ORDER BY id; +INSERT INTO Example SELECT number AS id FROM numbers(2); + +create table Null engine=Null as Example ; +--create table Null engine=MergeTree order by id as Example ; + +create materialized view Transform to Example as +select * from Null +join ( select * FROM Example + WHERE id IN (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM Null))))) + ) as old +using id; + +INSERT INTO Null SELECT number AS id FROM numbers(2); + +select * from Example; -- should return 4 rows diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference new file mode 100644 index 00000000000..b5b2aec9c12 --- /dev/null +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference @@ -0,0 +1,12 @@ +Hello [1,2] 1 +Hello [1,2] 2 +Hello [1,2] 1 +Hello [1,2] 1 +Hello [1,2] 2 +Hello [1,2] 2 +Hello 1 +Hello 2 +Hello 1 +Hello 1 +Hello 2 +Hello 2 diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql new file mode 100644 index 00000000000..f605a369822 --- /dev/null +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql @@ -0,0 +1,10 @@ +CREATE TABLE arrays_test (s String, arr Array(UInt8)) ENGINE = MergeTree() ORDER BY (s); + +INSERT INTO arrays_test VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []); + +SELECT s, arr, a FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr AS a WHERE a < 3 ORDER BY a; +SELECT s, arr, a FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr AS a WHERE a < 3 ORDER BY a; + + +SELECT s, arr FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; +SELECT s, arr FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; diff --git a/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference new file mode 100644 index 00000000000..e1c7b69b136 --- /dev/null +++ b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference @@ -0,0 +1,7 @@ +Array(UInt64) 12000 10000 +Date 12000 10001 +Float64 12000 10000 +Int64 10000 10000 +Map(UInt64, String) 10000 10000 +String 10000 10000 +UInt64 4000 4000 diff --git a/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh new file mode 100755 index 00000000000..d7709b722c9 --- /dev/null +++ b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "CREATE TABLE test_cc (d Dynamic) ENGINE = Memory" + + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT number::Int64 AS d FROM numbers(10000) SETTINGS max_threads=1,max_insert_threads=1" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toString(number) AS d FROM numbers(10000) SETTINGS max_threads=2,max_insert_threads=2" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toDate(number % 10000) AS d FROM numbers(10000) SETTINGS max_threads=3,max_insert_threads=3" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT [number, number + 1] AS d FROM numbers(10000) SETTINGS max_threads=4,max_insert_threads=4" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toFloat64(number) AS d FROM numbers(10000) SETTINGS max_threads=5,max_insert_threads=5" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT map(number, toString(number)) AS d FROM numbers(10000) SETTINGS max_threads=6,max_insert_threads=6" & + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --use_variant_as_common_type=1 --allow_experimental_variant_type=1 -q "INSERT INTO test_cc SELECT CAST(multiIf(number % 5 = 0, toString(number), number % 5 = 1, number, number % 5 = 2, toFloat64(number), number % 5 = 3, toDate('2020-01-01'), [number, number + 1]), 'Dynamic') FROM numbers(10000) SETTINGS max_threads=6,max_insert_threads=6" & + +wait + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "SELECT dynamicType(d) t, count(), uniqExact(d) FROM test_cc GROUP BY t ORDER BY t" diff --git a/tests/queries/0_stateless/03156_nullable_number_tips.reference b/tests/queries/0_stateless/03156_nullable_number_tips.reference new file mode 100644 index 00000000000..cb4e12684d8 --- /dev/null +++ b/tests/queries/0_stateless/03156_nullable_number_tips.reference @@ -0,0 +1,43 @@ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ +Nullable(UInt64), Nullable(size = 10, UInt64(size = 10), UInt8(size = 10)) + ┏━━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━━┩ +1. │ 1111111101 │ -- 1.11 billion + └────────────┘ + ┏━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━┩ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + x + +1. ᴺᵁᴸᴸ +UInt64, Sparse(size = 10, UInt64(size = 6), UInt64(size = 5)) + ┏━━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━━┩ +1. │ 1111111101 │ -- 1.11 billion + └────────────┘ + ┏━━━┓ + ┃ x ┃ + ┡━━━┩ +1. │ 0 │ + └───┘ + x + +1. 0 diff --git a/tests/queries/0_stateless/03156_nullable_number_tips.sql b/tests/queries/0_stateless/03156_nullable_number_tips.sql new file mode 100644 index 00000000000..e6f2fa36d86 --- /dev/null +++ b/tests/queries/0_stateless/03156_nullable_number_tips.sql @@ -0,0 +1,24 @@ +SELECT 123456789 AS x FORMAT PrettyCompact; +SELECT toNullable(123456789) AS x FORMAT PrettyCompact; +SELECT toLowCardinality(toNullable(123456789)) AS x FORMAT PrettyCompact; +SELECT toNullable(toLowCardinality(123456789)) AS x FORMAT PrettyCompact; +SELECT toLowCardinality(123456789) AS x FORMAT PrettyCompact; + +CREATE TEMPORARY TABLE test (x Nullable(UInt64), PRIMARY KEY ()) ENGINE = MergeTree SETTINGS ratio_of_defaults_for_sparse_serialization = 0; +INSERT INTO test SELECT number % 2 ? number * 123456789 : NULL FROM numbers(10); + +SELECT DISTINCT dumpColumnStructure(*) FROM test; + +SELECT * FROM test ORDER BY ALL DESC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS FIRST LIMIT 1 FORMAT PrettySpace; + +DROP TEMPORARY TABLE test; +CREATE TEMPORARY TABLE test (x UInt64, PRIMARY KEY ()) ENGINE = MergeTree SETTINGS ratio_of_defaults_for_sparse_serialization = 0; +INSERT INTO test SELECT number % 2 ? number * 123456789 : NULL FROM numbers(10); + +SELECT DISTINCT dumpColumnStructure(*) FROM test; + +SELECT * FROM test ORDER BY ALL DESC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS FIRST LIMIT 1 FORMAT PrettySpace; diff --git a/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference new file mode 100644 index 00000000000..5b2a36927ee --- /dev/null +++ b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference @@ -0,0 +1,6 @@ +100000 +100000 +100000 +100000 +100000 +100000 diff --git a/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql new file mode 100644 index 00000000000..836b426a9a9 --- /dev/null +++ b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS t_map_lc; + +CREATE TABLE t_map_lc +( + id UInt64, + t Tuple(m Map(LowCardinality(String), LowCardinality(String))) +) +ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_map_lc SELECT * FROM generateRandom('id UInt64, t Tuple(m Map(LowCardinality(String), LowCardinality(String)))') LIMIT 100000; + +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, mapKeys(t.m)); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.keys); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.values); +SELECT * FROM t_map_lc WHERE mapContains(t.m, 'not_existing_key_1337'); + +DROP TABLE t_map_lc; + +CREATE TABLE t_map_lc +( + id UInt64, + t Tuple(m Map(LowCardinality(String), LowCardinality(String))) +) +ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = '10G'; + +INSERT INTO t_map_lc SELECT * FROM generateRandom('id UInt64, t Tuple(m Map(LowCardinality(String), LowCardinality(String)))') LIMIT 100000; + +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, mapKeys(t.m)); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.keys); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.values); +SELECT * FROM t_map_lc WHERE mapContains(t.m, 'not_existing_key_1337'); + +DROP TABLE t_map_lc; diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.reference b/tests/queries/0_stateless/03157_dynamic_type_json.reference new file mode 100644 index 00000000000..38bca12bb95 --- /dev/null +++ b/tests/queries/0_stateless/03157_dynamic_type_json.reference @@ -0,0 +1,5 @@ +1 (((((((((('deep_value')))))))))) +2 (((((((((('deep_array_value')))))))))) + +(((((((((('deep_value')))))))))) Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String)))))))))) +(((((((((('deep_array_value')))))))))) Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String)))))))))) diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.sql b/tests/queries/0_stateless/03157_dynamic_type_json.sql new file mode 100644 index 00000000000..cb1a5987104 --- /dev/null +++ b/tests/queries/0_stateless/03157_dynamic_type_json.sql @@ -0,0 +1,13 @@ +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; + +CREATE TABLE test_deep_nested_json (i UInt16, d JSON) ENGINE = Memory; + +INSERT INTO test_deep_nested_json VALUES (1, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_value"}}}}}}}}}}'); +INSERT INTO test_deep_nested_json VALUES (2, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_array_value"}}}}}}}}}}'); + +SELECT * FROM test_deep_nested_json ORDER BY i; + +SELECT ''; +SELECT d::Dynamic d1, dynamicType(d1) FROM test_deep_nested_json ORDER BY i; diff --git a/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.reference b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql new file mode 100644 index 00000000000..ddf5185c945 --- /dev/null +++ b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql @@ -0,0 +1 @@ +SELECT 1 GROUP BY -9223372036854775808; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference b/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference new file mode 100644 index 00000000000..2ede006cedc --- /dev/null +++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference @@ -0,0 +1,17 @@ +false Variant(Bool, DateTime64(3), IPv6, String, UInt32) +false Variant(Bool, DateTime64(3), IPv6, String, UInt32) +true Variant(Bool, DateTime64(3), IPv6, String, UInt32) +2001-01-01 01:01:01.111 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +s Variant(Bool, DateTime64(3), IPv6, String, UInt32) +0 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +1 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +\N Variant(Bool, DateTime64(3), IPv6, String, UInt32) + +false Bool +false Bool +true Bool +2001-01-01 01:01:01.111 DateTime64(3) +s String +0 UInt32 +1 UInt32 +\N None diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql new file mode 100644 index 00000000000..20a9e17a148 --- /dev/null +++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql @@ -0,0 +1,15 @@ +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; + +CREATE TABLE test_variable (v Variant(String, UInt32, IPv6, Bool, DateTime64)) ENGINE = Memory; +CREATE TABLE test_dynamic (d Dynamic) ENGINE = Memory; + +INSERT INTO test_variable VALUES (1), ('s'), (0), ('0'), ('true'), ('false'), ('2001-01-01 01:01:01.111'), (NULL); + +SELECT v, toTypeName(v) FROM test_variable ORDER BY v; + +INSERT INTO test_dynamic SELECT * FROM test_variable; + +SELECT ''; +SELECT d, dynamicType(d) FROM test_dynamic ORDER BY d; diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference new file mode 100644 index 00000000000..72c5b90dbba --- /dev/null +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference @@ -0,0 +1,292 @@ +Array(Dynamic) [] +Array(Array(Dynamic)) [[]] +Array(Array(Array(Dynamic))) [[[]]] +Bool false +Bool true +Date 2022-01-01 +Date32 2022-01-01 +DateTime 2022-01-01 01:01:01 +DateTime64(3) 2022-01-01 01:01:01.011 +Decimal(9, 1) -99999999.9 +Decimal(18, 2) -999999999.99 +Decimal(38, 3) -999999999.999 +Decimal(76, 4) -999999999.9999 +Float32 -inf +Float32 -inf +Float32 -inf +Float32 -3.4028233e38 +Float32 -1.1754942e-38 +Float32 -1e-45 +Float32 1e-45 +Float32 1.1754942e-38 +Float32 3.4028233e38 +Float32 inf +Float32 inf +Float32 inf +Float32 nan +Float32 nan +Float32 nan +Float64 -inf +Float64 -inf +Float64 -inf +Float64 -1.7976931348623157e308 +Float64 -3.40282347e38 +Float64 -1.1754943499999998e-38 +Float64 -1.3999999999999999e-45 +Float64 -2.2250738585072014e-308 +Float64 2.2250738585072014e-308 +Float64 1.3999999999999999e-45 +Float64 1.1754943499999998e-38 +Float64 3.40282347e38 +Float64 1.7976931348623157e308 +Float64 inf +Float64 inf +Float64 inf +Float64 nan +Float64 nan +Float64 nan +FixedString(1) 1 +FixedString(2) 1\0 +FixedString(10) 1\0\0\0\0\0\0\0\0\0 +IPv4 192.168.0.1 +IPv6 ::1 +Int8 -128 +Int8 -128 +Int8 -127 +Int8 -127 +Int8 -1 +Int8 -1 +Int8 0 +Int8 0 +Int8 1 +Int8 1 +Int8 126 +Int8 126 +Int8 127 +Int8 127 +Int16 -32768 +Int16 -32767 +Int16 -1 +Int16 0 +Int16 1 +Int16 32766 +Int16 32767 +Int32 -2147483648 +Int32 -2147483647 +Int32 -1 +Int32 0 +Int32 1 +Int32 2147483646 +Int32 2147483647 +Int64 -9223372036854775808 +Int64 -9223372036854775807 +Int64 -1 +Int64 0 +Int64 1 +Int64 9223372036854775806 +Int64 9223372036854775807 +Int128 -170141183460469231731687303715884105728 +Int128 -170141183460469231731687303715884105727 +Int128 -1 +Int128 0 +Int128 1 +Int128 170141183460469231731687303715884105726 +Int128 170141183460469231731687303715884105727 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819968 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819967 +Int256 -1 +Int256 0 +Int256 1 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819966 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819967 +IntervalDay 1 +IntervalYear 3 +IntervalMonth 2 +LowCardinality(String) 1 +LowCardinality(String) 1 +LowCardinality(UInt16) 0 +MultiPolygon [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] +Map(Dynamic, Dynamic) {'11':'v1','22':'1'} +Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] +Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] +Point (1.23,4.5600000000000005) +Ring [(1.23,4.5600000000000005),(2.34,5.67)] +String string +SimpleAggregateFunction(anyLast, Array(Int16)) [1,2] +Tuple(Dynamic) ('') +Tuple(Tuple(Dynamic)) (('')) +Tuple(Tuple(Tuple(Dynamic))) (((''))) +UUID 00000000-0000-0000-0000-000000000000 +UUID dededdb6-7835-4ce4-8d11-b5de6f2820e9 +UInt8 0 +UInt8 1 +UInt8 254 +UInt8 255 +UInt16 0 +UInt16 1 +UInt16 65534 +UInt16 65535 +UInt32 0 +UInt32 1 +UInt32 4294967294 +UInt32 4294967295 +UInt64 0 +UInt64 1 +UInt64 18446744073709551614 +UInt64 18446744073709551615 +UInt128 0 +UInt128 1 +UInt128 340282366920938463463374607431768211454 +UInt128 340282366920938463463374607431768211455 +UInt256 0 +UInt256 1 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639934 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639935 + +Array(Dynamic) [] +Array(Array(Dynamic)) [[]] +Array(Array(Array(Dynamic))) [[[]]] +Bool false +Bool true +Date 2022-01-01 +Date32 2022-01-01 +DateTime 2022-01-01 01:01:01 +DateTime64(3) 2022-01-01 01:01:01.011 +Decimal(9, 1) -99999999.9 +Decimal(18, 2) -999999999.99 +Decimal(38, 3) -999999999.999 +Decimal(76, 4) -999999999.9999 +Float32 -inf +Float32 -inf +Float32 -inf +Float32 -3.4028233e38 +Float32 -1.1754942e-38 +Float32 -1e-45 +Float32 1e-45 +Float32 1.1754942e-38 +Float32 3.4028233e38 +Float32 inf +Float32 inf +Float32 inf +Float32 nan +Float32 nan +Float32 nan +Float64 -inf +Float64 -inf +Float64 -inf +Float64 -1.7976931348623157e308 +Float64 -3.40282347e38 +Float64 -1.1754943499999998e-38 +Float64 -1.3999999999999999e-45 +Float64 -2.2250738585072014e-308 +Float64 2.2250738585072014e-308 +Float64 1.3999999999999999e-45 +Float64 1.1754943499999998e-38 +Float64 3.40282347e38 +Float64 1.7976931348623157e308 +Float64 inf +Float64 inf +Float64 inf +Float64 nan +Float64 nan +Float64 nan +FixedString(1) 1 +FixedString(2) 1\0 +FixedString(10) 1\0\0\0\0\0\0\0\0\0 +IPv4 192.168.0.1 +IPv6 ::1 +Int8 -128 +Int8 -128 +Int8 -127 +Int8 -127 +Int8 -1 +Int8 -1 +Int8 0 +Int8 0 +Int8 1 +Int8 1 +Int8 126 +Int8 126 +Int8 127 +Int8 127 +Int16 -32768 +Int16 -32767 +Int16 -1 +Int16 0 +Int16 1 +Int16 32766 +Int16 32767 +Int32 -2147483648 +Int32 -2147483647 +Int32 -1 +Int32 0 +Int32 1 +Int32 2147483646 +Int32 2147483647 +Int64 -9223372036854775808 +Int64 -9223372036854775807 +Int64 -1 +Int64 0 +Int64 1 +Int64 9223372036854775806 +Int64 9223372036854775807 +Int128 -170141183460469231731687303715884105728 +Int128 -170141183460469231731687303715884105727 +Int128 -1 +Int128 0 +Int128 1 +Int128 170141183460469231731687303715884105726 +Int128 170141183460469231731687303715884105727 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819968 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819967 +Int256 -1 +Int256 0 +Int256 1 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819966 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819967 +IntervalDay 1 +IntervalYear 3 +IntervalMonth 2 +LowCardinality(String) 1 +LowCardinality(String) 1 +LowCardinality(UInt16) 0 +MultiPolygon [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] +Map(Dynamic, Dynamic) {'11':'v1','22':'1'} +Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] +Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] +Point (1.23,4.5600000000000005) +Ring [(1.23,4.5600000000000005),(2.34,5.67)] +String string +SimpleAggregateFunction(anyLast, Array(Int16)) [1,2] +Tuple(Dynamic) ('') +Tuple(Tuple(Dynamic)) (('')) +Tuple(Tuple(Tuple(Dynamic))) (((''))) +UUID 00000000-0000-0000-0000-000000000000 +UUID dededdb6-7835-4ce4-8d11-b5de6f2820e9 +UInt8 0 +UInt8 1 +UInt8 254 +UInt8 255 +UInt16 0 +UInt16 1 +UInt16 65534 +UInt16 65535 +UInt32 0 +UInt32 1 +UInt32 4294967294 +UInt32 4294967295 +UInt64 0 +UInt64 1 +UInt64 18446744073709551614 +UInt64 18446744073709551615 +UInt128 0 +UInt128 1 +UInt128 340282366920938463463374607431768211454 +UInt128 340282366920938463463374607431768211455 +UInt256 0 +UInt256 1 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639934 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639935 + +48 +48 diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql new file mode 100644 index 00000000000..d302205ca23 --- /dev/null +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql @@ -0,0 +1,95 @@ +-- Tags: no-random-settings + +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; +SET allow_suspicious_low_cardinality_types=1; + + +CREATE TABLE t (d Dynamic(max_types=255)) ENGINE = Memory; +-- Integer types: signed and unsigned integers (UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256) +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-32768::Int16), (-32767::Int16), (-1::Int16), (0::Int16), (1::Int16), (32766::Int16), (32767::Int16); +INSERT INTO t VALUES (-2147483648::Int32), (-2147483647::Int32), (-1::Int32), (0::Int32), (1::Int32), (2147483646::Int32), (2147483647::Int32); +INSERT INTO t VALUES (-9223372036854775808::Int64), (-9223372036854775807::Int64), (-1::Int64), (0::Int64), (1::Int64), (9223372036854775806::Int64), (9223372036854775807::Int64); +INSERT INTO t VALUES (-170141183460469231731687303715884105728::Int128), (-170141183460469231731687303715884105727::Int128), (-1::Int128), (0::Int128), (1::Int128), (170141183460469231731687303715884105726::Int128), (170141183460469231731687303715884105727::Int128); +INSERT INTO t VALUES (-57896044618658097711785492504343953926634992332820282019728792003956564819968::Int256), (-57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256), (-1::Int256), (0::Int256), (1::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819966::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256); + +INSERT INTO t VALUES (0::UInt8), (1::UInt8), (254::UInt8), (255::UInt8); +INSERT INTO t VALUES (0::UInt16), (1::UInt16), (65534::UInt16), (65535::UInt16); +INSERT INTO t VALUES (0::UInt32), (1::UInt32), (4294967294::UInt32), (4294967295::UInt32); +INSERT INTO t VALUES (0::UInt64), (1::UInt64), (18446744073709551614::UInt64), (18446744073709551615::UInt64); +INSERT INTO t VALUES (0::UInt128), (1::UInt128), (340282366920938463463374607431768211454::UInt128), (340282366920938463463374607431768211455::UInt128); +INSERT INTO t VALUES (0::UInt256), (1::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639934::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639935::UInt256); + +-- Floating-point numbers: floats(Float32 and Float64) and Decimal values +INSERT INTO t VALUES (1.17549435e-38::Float32), (3.40282347e+38::Float32), (-3.40282347e+38::Float32), (-1.17549435e-38::Float32), (1.4e-45::Float32), (-1.4e-45::Float32); +INSERT INTO t VALUES (inf::Float32), (-inf::Float32), (nan::Float32); +INSERT INTO t VALUES (inf::FLOAT(12)), (-inf::FLOAT(12)), (nan::FLOAT(12)); +INSERT INTO t VALUES (inf::FLOAT(15,22)), (-inf::FLOAT(15,22)), (nan::FLOAT(15,22)); + +INSERT INTO t VALUES (1.17549435e-38::Float64), (3.40282347e+38::Float64), (-3.40282347e+38::Float64), (-1.17549435e-38::Float64), (1.4e-45::Float64), (-1.4e-45::Float64); +INSERT INTO t VALUES (2.2250738585072014e-308::Float64), (1.7976931348623157e+308::Float64), (-1.7976931348623157e+308::Float64), (-2.2250738585072014e-308::Float64); +INSERT INTO t VALUES (inf::Float64), (-inf::Float64), (nan::Float64); +INSERT INTO t VALUES (inf::DOUBLE(12)), (-inf::DOUBLE(12)), (nan::DOUBLE(12)); +INSERT INTO t VALUES (inf::DOUBLE(15,22)), (-inf::DOUBLE(15,22)), (nan::DOUBLE(15,22)); + +INSERT INTO t VALUES (-99999999.9::Decimal32(1)); +INSERT INTO t VALUES (-999999999.99::Decimal64(2)); +INSERT INTO t VALUES (-999999999.999::Decimal128(3)); +INSERT INTO t VALUES (-999999999.9999::Decimal256(4)); + +-- Strings: String and FixedString +INSERT INTO t VALUES ('string'::String), ('1'::FixedString(1)), ('1'::FixedString(2)), ('1'::FixedString(10)); --(''::String), + +-- Boolean +INSERT INTO t VALUES ('1'::Bool), (0::Bool); + +-- Dates: use Date and Date32 for days, and DateTime and DateTime64 for instances in time +INSERT INTO t VALUES ('2022-01-01'::Date), ('2022-01-01'::Date32), ('2022-01-01 01:01:01'::DateTime), ('2022-01-01 01:01:01.011'::DateTime64); + +-- UUID +INSERT INTO t VALUES ('dededdb6-7835-4ce4-8d11-b5de6f2820e9'::UUID); +INSERT INTO t VALUES ('00000000-0000-0000-0000-000000000000'::UUID); + +-- LowCardinality +INSERT INTO t VALUES ('1'::LowCardinality(String)), ('1'::LowCardinality(String)), (0::LowCardinality(UInt16)); + +-- Arrays +INSERT INTO t VALUES ([]::Array(Dynamic)), ([[]]::Array(Array(Dynamic))), ([[[]]]::Array(Array(Array(Dynamic)))); + +-- Tuple +INSERT INTO t VALUES (()::Tuple(Dynamic)), ((())::Tuple(Tuple(Dynamic))), (((()))::Tuple(Tuple(Tuple(Dynamic)))); + +-- Map. +INSERT INTO t VALUES (map(11::Dynamic, 'v1'::Dynamic, '22'::Dynamic, 1::Dynamic)); + +-- SimpleAggregateFunction +INSERT INTO t VALUES ([1,2]::SimpleAggregateFunction(anyLast, Array(Int16))); + +-- IPs +INSERT INTO t VALUES (toIPv4('192.168.0.1')), (toIPv6('::1')); + +-- Geo +INSERT INTO t VALUES ((1.23, 4.56)::Point), (([(1.23, 4.56)::Point, (2.34, 5.67)::Point])::Ring); +INSERT INTO t VALUES ([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]::MultiPolygon); + +-- Interval +INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' year); + +-- Nested +INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String)); +INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))); + +SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; + +CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory; +INSERT INTO t2 SELECT * FROM t; + +SELECT ''; +SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; + +SELECT ''; +SELECT uniqExact(dynamicType(d)) t_ FROM t; +SELECT uniqExact(dynamicType(d)) t_ FROM t2; diff --git a/tests/queries/0_stateless/03160_dynamic_type_agg.reference b/tests/queries/0_stateless/03160_dynamic_type_agg.reference new file mode 100644 index 00000000000..54f6e428839 --- /dev/null +++ b/tests/queries/0_stateless/03160_dynamic_type_agg.reference @@ -0,0 +1 @@ +4950 4950 diff --git a/tests/queries/0_stateless/03160_dynamic_type_agg.sql b/tests/queries/0_stateless/03160_dynamic_type_agg.sql new file mode 100644 index 00000000000..f99232031a8 --- /dev/null +++ b/tests/queries/0_stateless/03160_dynamic_type_agg.sql @@ -0,0 +1,10 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE t (d Dynamic) ENGINE = Memory; + +INSERT INTO t SELECT sumState(number) AS d FROM numbers(100); + +SELECT finalizeAggregation(d.`AggregateFunction(sum, UInt64)`), + sumMerge(d.`AggregateFunction(sum, UInt64)`) +FROM t GROUP BY d.`AggregateFunction(sum, UInt64)`; + diff --git a/tests/queries/0_stateless/03160_pretty_format_tty.reference b/tests/queries/0_stateless/03160_pretty_format_tty.reference new file mode 100644 index 00000000000..6a5b453966d --- /dev/null +++ b/tests/queries/0_stateless/03160_pretty_format_tty.reference @@ -0,0 +1 @@ +100004 diff --git a/tests/queries/0_stateless/03160_pretty_format_tty.sh b/tests/queries/0_stateless/03160_pretty_format_tty.sh new file mode 100755 index 00000000000..bbc4b96eb90 --- /dev/null +++ b/tests/queries/0_stateless/03160_pretty_format_tty.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# default output_format_pretty_max_rows is 10K +$CLICKHOUSE_LOCAL -q "select * from numbers(100e3) format PrettySpace settings max_threads=1" | wc -l diff --git a/tests/queries/0_stateless/03161_cnf_reduction.reference b/tests/queries/0_stateless/03161_cnf_reduction.reference new file mode 100644 index 00000000000..5e39c0f3223 --- /dev/null +++ b/tests/queries/0_stateless/03161_cnf_reduction.reference @@ -0,0 +1,23 @@ +-- Expected plan with analyzer: +SELECT id +FROM `03161_table` +WHERE f +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1 + +-- Expected result with analyzer: +1 + +-- Expected plan w/o analyzer: +SELECT id +FROM `03161_table` +WHERE f +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0 + +-- Expected result w/o analyzer: +1 + +-- Reproducer from the issue with analyzer +2 + +-- Reproducer from the issue w/o analyzer +2 diff --git a/tests/queries/0_stateless/03161_cnf_reduction.sql b/tests/queries/0_stateless/03161_cnf_reduction.sql new file mode 100644 index 00000000000..b34e9171d45 --- /dev/null +++ b/tests/queries/0_stateless/03161_cnf_reduction.sql @@ -0,0 +1,72 @@ +DROP TABLE IF EXISTS 03161_table; + +CREATE TABLE 03161_table (id UInt32, f UInt8) ENGINE = Memory; + +INSERT INTO 03161_table VALUES (0, 0), (1, 1), (2, 0); + +SELECT '-- Expected plan with analyzer:'; + +EXPLAIN SYNTAX +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Expected result with analyzer:'; + +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Expected plan w/o analyzer:'; + +EXPLAIN SYNTAX +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +SELECT ''; + +SELECT '-- Expected result w/o analyzer:'; + +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +DROP TABLE IF EXISTS 03161_table; + +-- Checking reproducer from GitHub issue +-- https://github.com/ClickHouse/ClickHouse/issues/57400 + +DROP TABLE IF EXISTS 03161_reproducer; + +CREATE TABLE 03161_reproducer (c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8) ENGINE = Memory; + +INSERT INTO 03161_reproducer VALUES (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (0, 0, 0, 0, 0, 0, 0, 0, 1, 1), (0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (0, 0, 0, 0, 0, 0, 0, 1, 0, 1), (0, 0, 0, 0, 0, 0, 0, 1, 1, 0), (0, 0, 0, 0, 0, 0, 0, 1, 1, 1); + +SELECT ''; + +SELECT '-- Reproducer from the issue with analyzer'; + +SELECT count() +FROM 03161_reproducer +WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7)) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Reproducer from the issue w/o analyzer'; + +SELECT count() +FROM 03161_reproducer +WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7)) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +DROP TABLE IF EXISTS 03161_reproducer; diff --git a/tests/queries/0_stateless/03161_create_table_as_mv.reference b/tests/queries/0_stateless/03161_create_table_as_mv.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03161_create_table_as_mv.sql b/tests/queries/0_stateless/03161_create_table_as_mv.sql new file mode 100644 index 00000000000..e80659ac923 --- /dev/null +++ b/tests/queries/0_stateless/03161_create_table_as_mv.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS base_table; +DROP TABLE IF EXISTS target_table; +DROP TABLE IF EXISTS mv_from_base_to_target; +DROP TABLE IF EXISTS mv_with_storage; +DROP TABLE IF EXISTS other_table_1; +DROP TABLE IF EXISTS other_table_2; + +CREATE TABLE base_table (date DateTime, id String, cost Float64) ENGINE = MergeTree() ORDER BY date; +CREATE TABLE target_table (id String, total AggregateFunction(sum, Float64)) ENGINE = MergeTree() ORDER BY id; +CREATE MATERIALIZED VIEW mv_from_base_to_target TO target_table AS Select id, sumState(cost) FROM base_table GROUP BY id; +CREATE MATERIALIZED VIEW mv_with_storage ENGINE=MergeTree() ORDER BY id AS Select id, sumState(cost) FROM base_table GROUP BY id; + +CREATE TABLE other_table_1 AS mv_with_storage; +CREATE TABLE other_table_2 AS mv_from_base_to_target; -- { serverError INCORRECT_QUERY } diff --git a/tests/queries/0_stateless/03161_decimal_binary_math.reference b/tests/queries/0_stateless/03161_decimal_binary_math.reference new file mode 100644 index 00000000000..f7d9761c7c5 --- /dev/null +++ b/tests/queries/0_stateless/03161_decimal_binary_math.reference @@ -0,0 +1,75 @@ +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +42.4242 2.42 8686.104718 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +0.4242 0.24 0.514871 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 2.42 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +42.4242 2.42 42.4242 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 +0.4242 0.4242 0.599909 diff --git a/tests/queries/0_stateless/03161_decimal_binary_math.sql b/tests/queries/0_stateless/03161_decimal_binary_math.sql new file mode 100644 index 00000000000..5484cc6a9bb --- /dev/null +++ b/tests/queries/0_stateless/03161_decimal_binary_math.sql @@ -0,0 +1,79 @@ +SELECT toDecimal32('42.4242', 4) AS x, toDecimal32('2.42', 2) AS y, round(pow(x, y), 6); +SELECT toDecimal64('42.4242', 4) AS x, toDecimal32('2.42', 2) AS y, round(pow(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, toDecimal64('2.42', 2) AS y, round(pow(x, y), 6); +SELECT toDecimal64('42.4242', 4) AS x, toDecimal32('2.42', 2) AS y, round(pow(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(pow(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, toDecimal32('2.42', 2) AS y, round(pow(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(pow(x, y), 6); +SELECT 42.4242 AS x, toDecimal32('2.42', 2) AS y, round(pow(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, 2.42 AS y, round(pow(x, y), 6); +SELECT materialize(42.4242) AS x, toDecimal32('2.42', 2) AS y, round(pow(x, y), 6); +SELECT 42.4242 AS x, materialize(toDecimal32('2.42', 2)) AS y, round(pow(x, y), 6); +SELECT materialize(42.4242) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(pow(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, 2.42 AS y, round(pow(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, materialize(2.42) AS y, round(pow(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, materialize(2.42) AS y, round(pow(x, y), 6); + +SELECT toDecimal32('0.4242', 4) AS x, toDecimal32('0.24', 2) AS y, round(atan2(y, x), 6); +SELECT toDecimal64('0.4242', 4) AS x, toDecimal32('0.24', 2) AS y, round(atan2(y, x), 6); +SELECT toDecimal32('0.4242', 4) AS x, toDecimal64('0.24', 2) AS y, round(atan2(y, x), 6); +SELECT toDecimal64('0.4242', 4) AS x, toDecimal64('0.24', 2) AS y, round(atan2(y, x), 6); +SELECT toDecimal32('0.4242', 4) AS x, materialize(toDecimal32('0.24', 2)) AS y, round(atan2(y, x), 6); +SELECT materialize(toDecimal32('0.4242', 4)) AS x, toDecimal32('0.24', 2) AS y, round(atan2(y, x), 6); +SELECT materialize(toDecimal32('0.4242', 4)) AS x, materialize(toDecimal32('0.24', 2)) AS y, round(atan2(y, x), 6); +SELECT 0.4242 AS x, toDecimal32('0.24', 2) AS y, round(atan2(y, x), 6); +SELECT toDecimal32('0.4242', 4) AS x, 0.24 AS y, round(atan2(y, x), 6); +SELECT materialize(0.4242) AS x, toDecimal32('0.24', 2) AS y, round(atan2(y, x), 6); +SELECT 0.4242 AS x, materialize(toDecimal32('0.24', 2)) AS y, round(atan2(y, x), 6); +SELECT materialize(0.4242) AS x, materialize(toDecimal32('0.24', 2)) AS y, round(atan2(y, x), 6); +SELECT materialize(toDecimal32('0.4242', 4)) AS x, 0.24 AS y, round(atan2(y, x), 6); +SELECT toDecimal32('0.4242', 4) AS x, materialize(0.24) AS y, round(atan2(y, x), 6); +SELECT materialize(toDecimal32('0.4242', 4)) AS x, materialize(0.24) AS y, round(atan2(y, x), 6); + +SELECT toDecimal32('42.4242', 4) AS x, toDecimal32('2.42', 2) AS y, round(min2(x, y), 6); +SELECT toDecimal64('42.4242', 4) AS x, toDecimal32('2.42', 2) AS y, round(min2(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, toDecimal64('2.42', 2) AS y, round(min2(x, y), 6); +SELECT toDecimal64('42.4242', 4) AS x, toDecimal64('2.42', 2) AS y, round(min2(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(min2(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, toDecimal32('2.42', 2) AS y, round(min2(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(min2(x, y), 6); +SELECT 42.4242 AS x, toDecimal32('2.42', 2) AS y, round(min2(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, 2.42 AS y, round(min2(x, y), 6); +SELECT materialize(42.4242) AS x, toDecimal32('2.42', 2) AS y, round(min2(x, y), 6); +SELECT 42.4242 AS x, materialize(toDecimal32('2.42', 2)) AS y, round(min2(x, y), 6); +SELECT materialize(42.4242) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(min2(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, 2.42 AS y, round(min2(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, materialize(2.42) AS y, round(min2(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, materialize(2.42) AS y, round(min2(x, y), 6); + +SELECT toDecimal32('42.4242', 4) AS x, toDecimal32('2.42', 2) AS y, round(max2(x, y), 6); +SELECT toDecimal64('42.4242', 4) AS x, toDecimal32('2.42', 2) AS y, round(max2(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, toDecimal64('2.42', 2) AS y, round(max2(x, y), 6); +SELECT toDecimal64('42.4242', 4) AS x, toDecimal64('2.42', 2) AS y, round(max2(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(max2(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, toDecimal32('2.42', 2) AS y, round(max2(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(max2(x, y), 6); +SELECT 42.4242 AS x, toDecimal32('2.42', 2) AS y, round(max2(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, 2.42 AS y, round(max2(x, y), 6); +SELECT materialize(42.4242) AS x, toDecimal32('2.42', 2) AS y, round(max2(x, y), 6); +SELECT 42.4242 AS x, materialize(toDecimal32('2.42', 2)) AS y, round(max2(x, y), 6); +SELECT materialize(42.4242) AS x, materialize(toDecimal32('2.42', 2)) AS y, round(max2(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, 2.42 AS y, round(max2(x, y), 6); +SELECT toDecimal32('42.4242', 4) AS x, materialize(2.42) AS y, round(max2(x, y), 6); +SELECT materialize(toDecimal32('42.4242', 4)) AS x, materialize(2.42) AS y, round(max2(x, y), 6); + +SELECT toDecimal32('0.4242', 4) AS x, toDecimal32('0.4242', 4) AS y, round(hypot(x, y), 6); +SELECT toDecimal64('0.4242', 4) AS x, toDecimal32('0.4242', 4) AS y, round(hypot(x, y), 6); +SELECT toDecimal32('0.4242', 4) AS x, toDecimal64('0.4242', 4) AS y, round(hypot(x, y), 6); +SELECT toDecimal64('0.4242', 4) AS x, toDecimal64('0.4242', 4) AS y, round(hypot(x, y), 6); +SELECT toDecimal32('0.4242', 4) AS x, materialize(toDecimal32('0.4242', 4)) AS y, round(hypot(x, y), 6); +SELECT materialize(toDecimal32('0.4242', 4)) AS x, toDecimal32('0.4242', 4) AS y, round(hypot(x, y), 6); +SELECT materialize(toDecimal32('0.4242', 4)) AS x, materialize(toDecimal32('0.4242', 4)) AS y, round(hypot(x, y), 6); +SELECT 0.4242 AS x, toDecimal32('0.4242', 4) AS y, round(hypot(x, y), 6); +SELECT toDecimal32('0.4242', 4) AS x, 0.4242 AS y, round(hypot(x, y), 6); +SELECT materialize(0.4242) AS x, toDecimal32('0.4242', 4) AS y, round(hypot(x, y), 6); +SELECT 0.4242 AS x, materialize(toDecimal32('0.4242', 4)) AS y, round(hypot(x, y), 6); +SELECT materialize(0.4242) AS x, materialize(toDecimal32('0.4242', 4)) AS y, round(hypot(x, y), 6); +SELECT materialize(toDecimal32('0.4242', 4)) AS x, 0.4242 AS y, round(hypot(x, y), 6); +SELECT toDecimal32('0.4242', 4) AS x, materialize(0.4242) AS y, round(hypot(x, y), 6); +SELECT materialize(toDecimal32('0.4242', 4)) AS x, materialize(0.4242) AS y, round(hypot(x, y), 6); diff --git a/tests/queries/0_stateless/03161_ipv4_ipv6_equality.reference b/tests/queries/0_stateless/03161_ipv4_ipv6_equality.reference new file mode 100644 index 00000000000..2a4cb2e658f --- /dev/null +++ b/tests/queries/0_stateless/03161_ipv4_ipv6_equality.reference @@ -0,0 +1,8 @@ +1 +1 +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/03161_ipv4_ipv6_equality.sql b/tests/queries/0_stateless/03161_ipv4_ipv6_equality.sql new file mode 100644 index 00000000000..da2a660977a --- /dev/null +++ b/tests/queries/0_stateless/03161_ipv4_ipv6_equality.sql @@ -0,0 +1,11 @@ +-- Equal +SELECT toIPv4('127.0.0.1') = toIPv6('::ffff:127.0.0.1'); +SELECT toIPv6('::ffff:127.0.0.1') = toIPv4('127.0.0.1'); + +-- Not equal +SELECT toIPv4('127.0.0.1') = toIPv6('::ffff:127.0.0.2'); +SELECT toIPv4('127.0.0.2') = toIPv6('::ffff:127.0.0.1'); +SELECT toIPv6('::ffff:127.0.0.1') = toIPv4('127.0.0.2'); +SELECT toIPv6('::ffff:127.0.0.2') = toIPv4('127.0.0.1'); +SELECT toIPv4('127.0.0.1') = toIPv6('::ffef:127.0.0.1'); +SELECT toIPv6('::ffef:127.0.0.1') = toIPv4('127.0.0.1'); \ No newline at end of file diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql new file mode 100644 index 00000000000..cd29fae8fd7 --- /dev/null +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -0,0 +1,15 @@ + +DROP TABLE IF EXISTS users; + +CREATE TABLE users ( + uid Int16, + name String, + age Int16, + projection p1 (select count(), age group by age) +) ENGINE = MergeTree order by uid; + +INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users VALUES (6666, 'Ksenia', 48); +INSERT INTO users VALUES (8888, 'Alice', 50); + +DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/03162_dynamic_type_nested.reference b/tests/queries/0_stateless/03162_dynamic_type_nested.reference new file mode 100644 index 00000000000..8d5bcb5f85a --- /dev/null +++ b/tests/queries/0_stateless/03162_dynamic_type_nested.reference @@ -0,0 +1,4 @@ + ┌─dynamicType(d)──────────────┬─d─────────────────────────────────────────┬─d.Nested(x UInt32, y Dynamic).x─┬─d.Nested(x UInt32, y Dynamic).y───┬─dynamicType(arrayElement(d.Nested(x UInt32, y Dynamic).y, 1))─┬─d.Nested(x UInt32, y Dynamic).y.String─┬─d.Nested(x UInt32, y Dynamic).y.Tuple(Int64, Array(String))─┐ +1. │ Nested(x UInt32, y Dynamic) │ [(1,'aa'),(2,'bb')] │ [1,2] │ ['aa','bb'] │ String │ ['aa','bb'] │ [(0,[]),(0,[])] │ +2. │ Nested(x UInt32, y Dynamic) │ [(1,(2,['aa','bb'])),(5,(6,['ee','ff']))] │ [1,5] │ [(2,['aa','bb']),(6,['ee','ff'])] │ Tuple(Int64, Array(String)) │ [NULL,NULL] │ [(2,['aa','bb']),(6,['ee','ff'])] │ + └─────────────────────────────┴───────────────────────────────────────────┴─────────────────────────────────┴───────────────────────────────────┴───────────────────────────────────────────────────────────────┴────────────────────────────────────────┴─────────────────────────────────────────────────────────────┘ diff --git a/tests/queries/0_stateless/03162_dynamic_type_nested.sql b/tests/queries/0_stateless/03162_dynamic_type_nested.sql new file mode 100644 index 00000000000..94007459a9e --- /dev/null +++ b/tests/queries/0_stateless/03162_dynamic_type_nested.sql @@ -0,0 +1,16 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE t (d Dynamic) ENGINE = Memory; + +INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y Dynamic)) ; +INSERT INTO t VALUES ([(1, (2, ['aa', 'bb'])), (5, (6, ['ee', 'ff']))]::Nested(x UInt32, y Dynamic)); + +SELECT dynamicType(d), + d, + d.`Nested(x UInt32, y Dynamic)`.x, + d.`Nested(x UInt32, y Dynamic)`.y, + dynamicType(d.`Nested(x UInt32, y Dynamic)`.y[1]), + d.`Nested(x UInt32, y Dynamic)`.y.`String`, + d.`Nested(x UInt32, y Dynamic)`.y.`Tuple(Int64, Array(String))` +FROM t ORDER BY d +FORMAT PrettyCompactMonoBlock; diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.reference b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference new file mode 100644 index 00000000000..33e3a15c7fb --- /dev/null +++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference @@ -0,0 +1,10 @@ +str_0 Dynamic(max_types=3) String +1 Dynamic(max_types=3) UInt64 +str_2 Dynamic(max_types=3) String +3 Dynamic(max_types=3) UInt64 +[1,2,3] Array(Int64) +2020-01-01 Date +str_1 String +str_2 String +42 UInt64 +43 UInt64 diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.sql b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql new file mode 100644 index 00000000000..baba637eea4 --- /dev/null +++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql @@ -0,0 +1,8 @@ +SET allow_experimental_dynamic_type=1; +SELECT if(number % 2, number::Dynamic(max_types=3), ('str_' || toString(number))::Dynamic(max_types=2)) AS d, toTypeName(d), dynamicType(d) FROM numbers(4); +CREATE TABLE dynamic_test_1 (d Dynamic(max_types=3)) ENGINE = Memory; +INSERT INTO dynamic_test_1 VALUES ('str_1'), (42::UInt64); +CREATE TABLE dynamic_test_2 (d Dynamic(max_types=5)) ENGINE = Memory; +INSERT INTO dynamic_test_2 VALUES ('str_2'), (43::UInt64), ('2020-01-01'::Date), ([1, 2, 3]); +SELECT * FROM (SELECT d, dynamicType(d) FROM dynamic_test_1 UNION ALL SELECT d, dynamicType(d) FROM dynamic_test_2) order by d; + diff --git a/tests/queries/0_stateless/03164_analyzer_global_in_alias.reference b/tests/queries/0_stateless/03164_analyzer_global_in_alias.reference new file mode 100644 index 00000000000..459605fc1db --- /dev/null +++ b/tests/queries/0_stateless/03164_analyzer_global_in_alias.reference @@ -0,0 +1,4 @@ +1 1 +1 +1 1 +1 diff --git a/tests/queries/0_stateless/03164_analyzer_global_in_alias.sql b/tests/queries/0_stateless/03164_analyzer_global_in_alias.sql new file mode 100644 index 00000000000..00c293334ee --- /dev/null +++ b/tests/queries/0_stateless/03164_analyzer_global_in_alias.sql @@ -0,0 +1,6 @@ +SET allow_experimental_analyzer=1; +SELECT 1 GLOBAL IN (SELECT 1) AS s, s FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1; +SELECT 1 GLOBAL IN (SELECT 1) AS s FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1; + +SELECT 1 GLOBAL IN (SELECT 1) AS s, s FROM remote('127.0.0.{1,3}', system.one) GROUP BY 1; +SELECT 1 GLOBAL IN (SELECT 1) AS s FROM remote('127.0.0.{1,3}', system.one) GROUP BY 1; diff --git a/tests/queries/0_stateless/03164_analyzer_rewrite_aggregate_function_with_if.reference b/tests/queries/0_stateless/03164_analyzer_rewrite_aggregate_function_with_if.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03164_analyzer_rewrite_aggregate_function_with_if.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03164_analyzer_rewrite_aggregate_function_with_if.sql b/tests/queries/0_stateless/03164_analyzer_rewrite_aggregate_function_with_if.sql new file mode 100644 index 00000000000..52f767d8aae --- /dev/null +++ b/tests/queries/0_stateless/03164_analyzer_rewrite_aggregate_function_with_if.sql @@ -0,0 +1 @@ +SELECT countIf(multiIf(number < 2, NULL, if(number = 4, 1, 0))) FROM numbers(5); diff --git a/tests/queries/0_stateless/03164_analyzer_validate_tree_size.reference b/tests/queries/0_stateless/03164_analyzer_validate_tree_size.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03164_analyzer_validate_tree_size.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03164_analyzer_validate_tree_size.sql b/tests/queries/0_stateless/03164_analyzer_validate_tree_size.sql new file mode 100644 index 00000000000..0e581592aef --- /dev/null +++ b/tests/queries/0_stateless/03164_analyzer_validate_tree_size.sql @@ -0,0 +1,1007 @@ +CREATE TABLE t +( +c1 Int64 , +c2 Int64 , +c3 Int64 , +c4 Int64 , +c5 Int64 , +c6 Int64 , +c7 Int64 , +c8 Int64 , +c9 Int64 , +c10 Int64 , +c11 Int64 , +c12 Int64 , +c13 Int64 , +c14 Int64 , +c15 Int64 , +c16 Int64 , +c17 Int64 , +c18 Int64 , +c19 Int64 , +c20 Int64 , +c21 Int64 , +c22 Int64 , +c23 Int64 , +c24 Int64 , +c25 Int64 , +c26 Int64 , +c27 Int64 , +c28 Int64 , +c29 Int64 , +c30 Int64 , +c31 Int64 , +c32 Int64 , +c33 Int64 , +c34 Int64 , +c35 Int64 , +c36 Int64 , +c37 Int64 , +c38 Int64 , +c39 Int64 , +c40 Int64 , +c41 Int64 , +c42 Int64 , +c43 Int64 , +c44 Int64 , +c45 Int64 , +c46 Int64 , +c47 Int64 , +c48 Int64 , +c49 Int64 , +c50 Int64 , +c51 Int64 , +c52 Int64 , +c53 Int64 , +c54 Int64 , +c55 Int64 , +c56 Int64 , +c57 Int64 , +c58 Int64 , +c59 Int64 , +c60 Int64 , +c61 Int64 , +c62 Int64 , +c63 Int64 , +c64 Int64 , +c65 Int64 , +c66 Int64 , +c67 Int64 , +c68 Int64 , +c69 Int64 , +c70 Int64 , +c71 Int64 , +c72 Int64 , +c73 Int64 , +c74 Int64 , +c75 Int64 , +c76 Int64 , +c77 Int64 , +c78 Int64 , +c79 Int64 , +c80 Int64 , +c81 Int64 , +c82 Int64 , +c83 Int64 , +c84 Int64 , +c85 Int64 , +c86 Int64 , +c87 Int64 , +c88 Int64 , +c89 Int64 , +c90 Int64 , +c91 Int64 , +c92 Int64 , +c93 Int64 , +c94 Int64 , +c95 Int64 , +c96 Int64 , +c97 Int64 , +c98 Int64 , +c99 Int64 , +c100 Int64 , +c101 Int64 , +c102 Int64 , +c103 Int64 , +c104 Int64 , +c105 Int64 , +c106 Int64 , +c107 Int64 , +c108 Int64 , +c109 Int64 , +c110 Int64 , +c111 Int64 , +c112 Int64 , +c113 Int64 , +c114 Int64 , +c115 Int64 , +c116 Int64 , +c117 Int64 , +c118 Int64 , +c119 Int64 , +c120 Int64 , +c121 Int64 , +c122 Int64 , +c123 Int64 , +c124 Int64 , +c125 Int64 , +c126 Int64 , +c127 Int64 , +c128 Int64 , +c129 Int64 , +c130 Int64 , +c131 Int64 , +c132 Int64 , +c133 Int64 , +c134 Int64 , +c135 Int64 , +c136 Int64 , +c137 Int64 , +c138 Int64 , +c139 Int64 , +c140 Int64 , +c141 Int64 , +c142 Int64 , +c143 Int64 , +c144 Int64 , +c145 Int64 , +c146 Int64 , +c147 Int64 , +c148 Int64 , +c149 Int64 , +c150 Int64 , +c151 Int64 , +c152 Int64 , +c153 Int64 , +c154 Int64 , +c155 Int64 , +c156 Int64 , +c157 Int64 , +c158 Int64 , +c159 Int64 , +c160 Int64 , +c161 Int64 , +c162 Int64 , +c163 Int64 , +c164 Int64 , +c165 Int64 , +c166 Int64 , +c167 Int64 , +c168 Int64 , +c169 Int64 , +c170 Int64 , +c171 Int64 , +c172 Int64 , +c173 Int64 , +c174 Int64 , +c175 Int64 , +c176 Int64 , +c177 Int64 , +c178 Int64 , +c179 Int64 , +c180 Int64 , +c181 Int64 , +c182 Int64 , +c183 Int64 , +c184 Int64 , +c185 Int64 , +c186 Int64 , +c187 Int64 , +c188 Int64 , +c189 Int64 , +c190 Int64 , +c191 Int64 , +c192 Int64 , +c193 Int64 , +c194 Int64 , +c195 Int64 , +c196 Int64 , +c197 Int64 , +c198 Int64 , +c199 Int64 , +c200 Int64 , +c201 Int64 , +c202 Int64 , +c203 Int64 , +c204 Int64 , +c205 Int64 , +c206 Int64 , +c207 Int64 , +c208 Int64 , +c209 Int64 , +c210 Int64 , +c211 Int64 , +c212 Int64 , +c213 Int64 , +c214 Int64 , +c215 Int64 , +c216 Int64 , +c217 Int64 , +c218 Int64 , +c219 Int64 , +c220 Int64 , +c221 Int64 , +c222 Int64 , +c223 Int64 , +c224 Int64 , +c225 Int64 , +c226 Int64 , +c227 Int64 , +c228 Int64 , +c229 Int64 , +c230 Int64 , +c231 Int64 , +c232 Int64 , +c233 Int64 , +c234 Int64 , +c235 Int64 , +c236 Int64 , +c237 Int64 , +c238 Int64 , +c239 Int64 , +c240 Int64 , +c241 Int64 , +c242 Int64 , +c243 Int64 , +c244 Int64 , +c245 Int64 , +c246 Int64 , +c247 Int64 , +c248 Int64 , +c249 Int64 , +c250 Int64 , +c251 Int64 , +c252 Int64 , +c253 Int64 , +c254 Int64 , +c255 Int64 , +c256 Int64 , +c257 Int64 , +c258 Int64 , +c259 Int64 , +c260 Int64 , +c261 Int64 , +c262 Int64 , +c263 Int64 , +c264 Int64 , +c265 Int64 , +c266 Int64 , +c267 Int64 , +c268 Int64 , +c269 Int64 , +c270 Int64 , +c271 Int64 , +c272 Int64 , +c273 Int64 , +c274 Int64 , +c275 Int64 , +c276 Int64 , +c277 Int64 , +c278 Int64 , +c279 Int64 , +c280 Int64 , +c281 Int64 , +c282 Int64 , +c283 Int64 , +c284 Int64 , +c285 Int64 , +c286 Int64 , +c287 Int64 , +c288 Int64 , +c289 Int64 , +c290 Int64 , +c291 Int64 , +c292 Int64 , +c293 Int64 , +c294 Int64 , +c295 Int64 , +c296 Int64 , +c297 Int64 , +c298 Int64 , +c299 Int64 , +c300 Int64 , +c301 Int64 , +c302 Int64 , +c303 Int64 , +c304 Int64 , +c305 Int64 , +c306 Int64 , +c307 Int64 , +c308 Int64 , +c309 Int64 , +c310 Int64 , +c311 Int64 , +c312 Int64 , +c313 Int64 , +c314 Int64 , +c315 Int64 , +c316 Int64 , +c317 Int64 , +c318 Int64 , +c319 Int64 , +c320 Int64 , +c321 Int64 , +c322 Int64 , +c323 Int64 , +c324 Int64 , +c325 Int64 , +c326 Int64 , +c327 Int64 , +c328 Int64 , +c329 Int64 , +c330 Int64 , +c331 Int64 , +c332 Int64 , +c333 Int64 , +c334 Int64 , +c335 Int64 , +c336 Int64 , +c337 Int64 , +c338 Int64 , +c339 Int64 , +c340 Int64 , +c341 Int64 , +c342 Int64 , +c343 Int64 , +c344 Int64 , +c345 Int64 , +c346 Int64 , +c347 Int64 , +c348 Int64 , +c349 Int64 , +c350 Int64 , +c351 Int64 , +c352 Int64 , +c353 Int64 , +c354 Int64 , +c355 Int64 , +c356 Int64 , +c357 Int64 , +c358 Int64 , +c359 Int64 , +c360 Int64 , +c361 Int64 , +c362 Int64 , +c363 Int64 , +c364 Int64 , +c365 Int64 , +c366 Int64 , +c367 Int64 , +c368 Int64 , +c369 Int64 , +c370 Int64 , +c371 Int64 , +c372 Int64 , +c373 Int64 , +c374 Int64 , +c375 Int64 , +c376 Int64 , +c377 Int64 , +c378 Int64 , +c379 Int64 , +c380 Int64 , +c381 Int64 , +c382 Int64 , +c383 Int64 , +c384 Int64 , +c385 Int64 , +c386 Int64 , +c387 Int64 , +c388 Int64 , +c389 Int64 , +c390 Int64 , +c391 Int64 , +c392 Int64 , +c393 Int64 , +c394 Int64 , +c395 Int64 , +c396 Int64 , +c397 Int64 , +c398 Int64 , +c399 Int64 , +c400 Int64 , +c401 Int64 , +c402 Int64 , +c403 Int64 , +c404 Int64 , +c405 Int64 , +c406 Int64 , +c407 Int64 , +c408 Int64 , +c409 Int64 , +c410 Int64 , +c411 Int64 , +c412 Int64 , +c413 Int64 , +c414 Int64 , +c415 Int64 , +c416 Int64 , +c417 Int64 , +c418 Int64 , +c419 Int64 , +c420 Int64 , +c421 Int64 , +c422 Int64 , +c423 Int64 , +c424 Int64 , +c425 Int64 , +c426 Int64 , +c427 Int64 , +c428 Int64 , +c429 Int64 , +c430 Int64 , +c431 Int64 , +c432 Int64 , +c433 Int64 , +c434 Int64 , +c435 Int64 , +c436 Int64 , +c437 Int64 , +c438 Int64 , +c439 Int64 , +c440 Int64 , +c441 Int64 , +c442 Int64 , +c443 Int64 , +c444 Int64 , +c445 Int64 , +c446 Int64 , +c447 Int64 , +c448 Int64 , +c449 Int64 , +c450 Int64 , +c451 Int64 , +c452 Int64 , +c453 Int64 , +c454 Int64 , +c455 Int64 , +c456 Int64 , +c457 Int64 , +c458 Int64 , +c459 Int64 , +c460 Int64 , +c461 Int64 , +c462 Int64 , +c463 Int64 , +c464 Int64 , +c465 Int64 , +c466 Int64 , +c467 Int64 , +c468 Int64 , +c469 Int64 , +c470 Int64 , +c471 Int64 , +c472 Int64 , +c473 Int64 , +c474 Int64 , +c475 Int64 , +c476 Int64 , +c477 Int64 , +c478 Int64 , +c479 Int64 , +c480 Int64 , +c481 Int64 , +c482 Int64 , +c483 Int64 , +c484 Int64 , +c485 Int64 , +c486 Int64 , +c487 Int64 , +c488 Int64 , +c489 Int64 , +c490 Int64 , +c491 Int64 , +c492 Int64 , +c493 Int64 , +c494 Int64 , +c495 Int64 , +c496 Int64 , +c497 Int64 , +c498 Int64 , +c499 Int64 , +c500 Int64 , +b1 Int64 , +b2 Int64 , +b3 Int64 , +b4 Int64 , +b5 Int64 , +b6 Int64 , +b7 Int64 , +b8 Int64 , +b9 Int64 , +b10 Int64 , +b11 Int64 , +b12 Int64 , +b13 Int64 , +b14 Int64 , +b15 Int64 , +b16 Int64 , +b17 Int64 , +b18 Int64 , +b19 Int64 , +b20 Int64 , +b21 Int64 , +b22 Int64 , +b23 Int64 , +b24 Int64 , +b25 Int64 , +b26 Int64 , +b27 Int64 , +b28 Int64 , +b29 Int64 , +b30 Int64 , +b31 Int64 , +b32 Int64 , +b33 Int64 , +b34 Int64 , +b35 Int64 , +b36 Int64 , +b37 Int64 , +b38 Int64 , +b39 Int64 , +b40 Int64 , +b41 Int64 , +b42 Int64 , +b43 Int64 , +b44 Int64 , +b45 Int64 , +b46 Int64 , +b47 Int64 , +b48 Int64 , +b49 Int64 , +b50 Int64 , +b51 Int64 , +b52 Int64 , +b53 Int64 , +b54 Int64 , +b55 Int64 , +b56 Int64 , +b57 Int64 , +b58 Int64 , +b59 Int64 , +b60 Int64 , +b61 Int64 , +b62 Int64 , +b63 Int64 , +b64 Int64 , +b65 Int64 , +b66 Int64 , +b67 Int64 , +b68 Int64 , +b69 Int64 , +b70 Int64 , +b71 Int64 , +b72 Int64 , +b73 Int64 , +b74 Int64 , +b75 Int64 , +b76 Int64 , +b77 Int64 , +b78 Int64 , +b79 Int64 , +b80 Int64 , +b81 Int64 , +b82 Int64 , +b83 Int64 , +b84 Int64 , +b85 Int64 , +b86 Int64 , +b87 Int64 , +b88 Int64 , +b89 Int64 , +b90 Int64 , +b91 Int64 , +b92 Int64 , +b93 Int64 , +b94 Int64 , +b95 Int64 , +b96 Int64 , +b97 Int64 , +b98 Int64 , +b99 Int64 , +b100 Int64 , +b101 Int64 , +b102 Int64 , +b103 Int64 , +b104 Int64 , +b105 Int64 , +b106 Int64 , +b107 Int64 , +b108 Int64 , +b109 Int64 , +b110 Int64 , +b111 Int64 , +b112 Int64 , +b113 Int64 , +b114 Int64 , +b115 Int64 , +b116 Int64 , +b117 Int64 , +b118 Int64 , +b119 Int64 , +b120 Int64 , +b121 Int64 , +b122 Int64 , +b123 Int64 , +b124 Int64 , +b125 Int64 , +b126 Int64 , +b127 Int64 , +b128 Int64 , +b129 Int64 , +b130 Int64 , +b131 Int64 , +b132 Int64 , +b133 Int64 , +b134 Int64 , +b135 Int64 , +b136 Int64 , +b137 Int64 , +b138 Int64 , +b139 Int64 , +b140 Int64 , +b141 Int64 , +b142 Int64 , +b143 Int64 , +b144 Int64 , +b145 Int64 , +b146 Int64 , +b147 Int64 , +b148 Int64 , +b149 Int64 , +b150 Int64 , +b151 Int64 , +b152 Int64 , +b153 Int64 , +b154 Int64 , +b155 Int64 , +b156 Int64 , +b157 Int64 , +b158 Int64 , +b159 Int64 , +b160 Int64 , +b161 Int64 , +b162 Int64 , +b163 Int64 , +b164 Int64 , +b165 Int64 , +b166 Int64 , +b167 Int64 , +b168 Int64 , +b169 Int64 , +b170 Int64 , +b171 Int64 , +b172 Int64 , +b173 Int64 , +b174 Int64 , +b175 Int64 , +b176 Int64 , +b177 Int64 , +b178 Int64 , +b179 Int64 , +b180 Int64 , +b181 Int64 , +b182 Int64 , +b183 Int64 , +b184 Int64 , +b185 Int64 , +b186 Int64 , +b187 Int64 , +b188 Int64 , +b189 Int64 , +b190 Int64 , +b191 Int64 , +b192 Int64 , +b193 Int64 , +b194 Int64 , +b195 Int64 , +b196 Int64 , +b197 Int64 , +b198 Int64 , +b199 Int64 , +b200 Int64 , +b201 Int64 , +b202 Int64 , +b203 Int64 , +b204 Int64 , +b205 Int64 , +b206 Int64 , +b207 Int64 , +b208 Int64 , +b209 Int64 , +b210 Int64 , +b211 Int64 , +b212 Int64 , +b213 Int64 , +b214 Int64 , +b215 Int64 , +b216 Int64 , +b217 Int64 , +b218 Int64 , +b219 Int64 , +b220 Int64 , +b221 Int64 , +b222 Int64 , +b223 Int64 , +b224 Int64 , +b225 Int64 , +b226 Int64 , +b227 Int64 , +b228 Int64 , +b229 Int64 , +b230 Int64 , +b231 Int64 , +b232 Int64 , +b233 Int64 , +b234 Int64 , +b235 Int64 , +b236 Int64 , +b237 Int64 , +b238 Int64 , +b239 Int64 , +b240 Int64 , +b241 Int64 , +b242 Int64 , +b243 Int64 , +b244 Int64 , +b245 Int64 , +b246 Int64 , +b247 Int64 , +b248 Int64 , +b249 Int64 , +b250 Int64 , +b251 Int64 , +b252 Int64 , +b253 Int64 , +b254 Int64 , +b255 Int64 , +b256 Int64 , +b257 Int64 , +b258 Int64 , +b259 Int64 , +b260 Int64 , +b261 Int64 , +b262 Int64 , +b263 Int64 , +b264 Int64 , +b265 Int64 , +b266 Int64 , +b267 Int64 , +b268 Int64 , +b269 Int64 , +b270 Int64 , +b271 Int64 , +b272 Int64 , +b273 Int64 , +b274 Int64 , +b275 Int64 , +b276 Int64 , +b277 Int64 , +b278 Int64 , +b279 Int64 , +b280 Int64 , +b281 Int64 , +b282 Int64 , +b283 Int64 , +b284 Int64 , +b285 Int64 , +b286 Int64 , +b287 Int64 , +b288 Int64 , +b289 Int64 , +b290 Int64 , +b291 Int64 , +b292 Int64 , +b293 Int64 , +b294 Int64 , +b295 Int64 , +b296 Int64 , +b297 Int64 , +b298 Int64 , +b299 Int64 , +b300 Int64 , +b301 Int64 , +b302 Int64 , +b303 Int64 , +b304 Int64 , +b305 Int64 , +b306 Int64 , +b307 Int64 , +b308 Int64 , +b309 Int64 , +b310 Int64 , +b311 Int64 , +b312 Int64 , +b313 Int64 , +b314 Int64 , +b315 Int64 , +b316 Int64 , +b317 Int64 , +b318 Int64 , +b319 Int64 , +b320 Int64 , +b321 Int64 , +b322 Int64 , +b323 Int64 , +b324 Int64 , +b325 Int64 , +b326 Int64 , +b327 Int64 , +b328 Int64 , +b329 Int64 , +b330 Int64 , +b331 Int64 , +b332 Int64 , +b333 Int64 , +b334 Int64 , +b335 Int64 , +b336 Int64 , +b337 Int64 , +b338 Int64 , +b339 Int64 , +b340 Int64 , +b341 Int64 , +b342 Int64 , +b343 Int64 , +b344 Int64 , +b345 Int64 , +b346 Int64 , +b347 Int64 , +b348 Int64 , +b349 Int64 , +b350 Int64 , +b351 Int64 , +b352 Int64 , +b353 Int64 , +b354 Int64 , +b355 Int64 , +b356 Int64 , +b357 Int64 , +b358 Int64 , +b359 Int64 , +b360 Int64 , +b361 Int64 , +b362 Int64 , +b363 Int64 , +b364 Int64 , +b365 Int64 , +b366 Int64 , +b367 Int64 , +b368 Int64 , +b369 Int64 , +b370 Int64 , +b371 Int64 , +b372 Int64 , +b373 Int64 , +b374 Int64 , +b375 Int64 , +b376 Int64 , +b377 Int64 , +b378 Int64 , +b379 Int64 , +b380 Int64 , +b381 Int64 , +b382 Int64 , +b383 Int64 , +b384 Int64 , +b385 Int64 , +b386 Int64 , +b387 Int64 , +b388 Int64 , +b389 Int64 , +b390 Int64 , +b391 Int64 , +b392 Int64 , +b393 Int64 , +b394 Int64 , +b395 Int64 , +b396 Int64 , +b397 Int64 , +b398 Int64 , +b399 Int64 , +b400 Int64 , +b401 Int64 , +b402 Int64 , +b403 Int64 , +b404 Int64 , +b405 Int64 , +b406 Int64 , +b407 Int64 , +b408 Int64 , +b409 Int64 , +b410 Int64 , +b411 Int64 , +b412 Int64 , +b413 Int64 , +b414 Int64 , +b415 Int64 , +b416 Int64 , +b417 Int64 , +b418 Int64 , +b419 Int64 , +b420 Int64 , +b421 Int64 , +b422 Int64 , +b423 Int64 , +b424 Int64 , +b425 Int64 , +b426 Int64 , +b427 Int64 , +b428 Int64 , +b429 Int64 , +b430 Int64 , +b431 Int64 , +b432 Int64 , +b433 Int64 , +b434 Int64 , +b435 Int64 , +b436 Int64 , +b437 Int64 , +b438 Int64 , +b439 Int64 , +b440 Int64 , +b441 Int64 , +b442 Int64 , +b443 Int64 , +b444 Int64 , +b445 Int64 , +b446 Int64 , +b447 Int64 , +b448 Int64 , +b449 Int64 , +b450 Int64 , +b451 Int64 , +b452 Int64 , +b453 Int64 , +b454 Int64 , +b455 Int64 , +b456 Int64 , +b457 Int64 , +b458 Int64 , +b459 Int64 , +b460 Int64 , +b461 Int64 , +b462 Int64 , +b463 Int64 , +b464 Int64 , +b465 Int64 , +b466 Int64 , +b467 Int64 , +b468 Int64 , +b469 Int64 , +b470 Int64 , +b471 Int64 , +b472 Int64 , +b473 Int64 , +b474 Int64 , +b475 Int64 , +b476 Int64 , +b477 Int64 , +b478 Int64 , +b479 Int64 , +b480 Int64 , +b481 Int64 , +b482 Int64 , +b483 Int64 , +b484 Int64 , +b485 Int64 , +b486 Int64 , +b487 Int64 , +b488 Int64 , +b489 Int64 , +b490 Int64 , +b491 Int64 , +b492 Int64 , +b493 Int64 , +b494 Int64 , +b495 Int64 , +b496 Int64 , +b497 Int64 , +b498 Int64 , +b499 Int64 , +b500 Int64 +) ENGINE = Memory; + +insert into t(c1) values(1); + +SELECT count() FROM (SELECT tuple(*) FROM t); diff --git a/tests/queries/0_stateless/03164_create_as_default.reference b/tests/queries/0_stateless/03164_create_as_default.reference new file mode 100644 index 00000000000..aceba23beaf --- /dev/null +++ b/tests/queries/0_stateless/03164_create_as_default.reference @@ -0,0 +1,5 @@ +CREATE TABLE default.src_table\n(\n `time` DateTime(\'UTC\') DEFAULT fromUnixTimestamp(sipTimestamp),\n `sipTimestamp` UInt64\n)\nENGINE = MergeTree\nORDER BY time\nSETTINGS index_granularity = 8192 +sipTimestamp +time fromUnixTimestamp(sipTimestamp) +{"time":"2024-05-20 09:00:00","sipTimestamp":"1716195600"} +{"time":"2024-05-20 09:00:00","sipTimestamp":"1716195600"} diff --git a/tests/queries/0_stateless/03164_create_as_default.sql b/tests/queries/0_stateless/03164_create_as_default.sql new file mode 100644 index 00000000000..e9fd7c1e35a --- /dev/null +++ b/tests/queries/0_stateless/03164_create_as_default.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS src_table; +DROP TABLE IF EXISTS copied_table; + +CREATE TABLE src_table +( + time DateTime('UTC') DEFAULT fromUnixTimestamp(sipTimestamp), + sipTimestamp UInt64 +) +ENGINE = MergeTree +ORDER BY time; + +INSERT INTO src_table(sipTimestamp) VALUES (toUnixTimestamp(toDateTime('2024-05-20 09:00:00', 'UTC'))); + +CREATE TABLE copied_table AS src_table; + +ALTER TABLE copied_table RENAME COLUMN `sipTimestamp` TO `timestamp`; + +SHOW CREATE TABLE src_table; + +SELECT name, default_expression FROM system.columns WHERE database = currentDatabase() AND table = 'src_table' ORDER BY name; +INSERT INTO src_table(sipTimestamp) VALUES (toUnixTimestamp(toDateTime('2024-05-20 09:00:00', 'UTC'))); + +SELECT * FROM src_table ORDER BY time FORMAT JSONEachRow; +SELECT * FROM copied_table ORDER BY time FORMAT JSONEachRow; + +DROP TABLE src_table; +DROP TABLE copied_table; diff --git a/tests/queries/0_stateless/03164_materialize_skip_index.reference b/tests/queries/0_stateless/03164_materialize_skip_index.reference new file mode 100644 index 00000000000..34251101e89 --- /dev/null +++ b/tests/queries/0_stateless/03164_materialize_skip_index.reference @@ -0,0 +1,52 @@ +20 +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Expression + ReadFromMergeTree (default.t_skip_index_insert) + Indexes: + Skip + Name: idx_a + Description: minmax GRANULARITY 1 + Parts: 2/2 + Granules: 50/50 + Skip + Name: idx_b + Description: set GRANULARITY 1 + Parts: 2/2 + Granules: 50/50 +20 +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Expression + ReadFromMergeTree (default.t_skip_index_insert) + Indexes: + Skip + Name: idx_a + Description: minmax GRANULARITY 1 + Parts: 1/1 + Granules: 6/50 + Skip + Name: idx_b + Description: set GRANULARITY 1 + Parts: 1/1 + Granules: 6/6 +20 +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Expression + ReadFromMergeTree (default.t_skip_index_insert) + Indexes: + Skip + Name: idx_a + Description: minmax GRANULARITY 1 + Parts: 1/2 + Granules: 6/50 + Skip + Name: idx_b + Description: set GRANULARITY 1 + Parts: 1/1 + Granules: 6/6 +4 0 diff --git a/tests/queries/0_stateless/03164_materialize_skip_index.sql b/tests/queries/0_stateless/03164_materialize_skip_index.sql new file mode 100644 index 00000000000..4e59ef6b6cd --- /dev/null +++ b/tests/queries/0_stateless/03164_materialize_skip_index.sql @@ -0,0 +1,50 @@ +DROP TABLE IF EXISTS t_skip_index_insert; + +CREATE TABLE t_skip_index_insert +( + a UInt64, + b UInt64, + INDEX idx_a a TYPE minmax, + INDEX idx_b b TYPE set(3) +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 4; + +SET allow_experimental_analyzer = 1; +SET materialize_skip_indexes_on_insert = 0; + +SYSTEM STOP MERGES t_skip_index_insert; + +INSERT INTO t_skip_index_insert SELECT number, number / 50 FROM numbers(100); +INSERT INTO t_skip_index_insert SELECT number, number / 50 FROM numbers(100, 100); + +SELECT count() FROM t_skip_index_insert WHERE a >= 110 AND a < 130 AND b = 2; +EXPLAIN indexes = 1 SELECT count() FROM t_skip_index_insert WHERE a >= 110 AND a < 130 AND b = 2; + +SYSTEM START MERGES t_skip_index_insert; +OPTIMIZE TABLE t_skip_index_insert FINAL; + +SELECT count() FROM t_skip_index_insert WHERE a >= 110 AND a < 130 AND b = 2; +EXPLAIN indexes = 1 SELECT count() FROM t_skip_index_insert WHERE a >= 110 AND a < 130 AND b = 2; + +TRUNCATE TABLE t_skip_index_insert; + +INSERT INTO t_skip_index_insert SELECT number, number / 50 FROM numbers(100); +INSERT INTO t_skip_index_insert SELECT number, number / 50 FROM numbers(100, 100); + +SET mutations_sync = 2; + +ALTER TABLE t_skip_index_insert MATERIALIZE INDEX idx_a; +ALTER TABLE t_skip_index_insert MATERIALIZE INDEX idx_b; + +SELECT count() FROM t_skip_index_insert WHERE a >= 110 AND a < 130 AND b = 2; +EXPLAIN indexes = 1 SELECT count() FROM t_skip_index_insert WHERE a >= 110 AND a < 130 AND b = 2; + +DROP TABLE IF EXISTS t_skip_index_insert; + +SYSTEM FLUSH LOGS; + +SELECT count(), sum(ProfileEvents['MergeTreeDataWriterSkipIndicesCalculationMicroseconds']) +FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE 'INSERT INTO t_skip_index_insert SELECT%' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03164_materialize_statistics.reference b/tests/queries/0_stateless/03164_materialize_statistics.reference new file mode 100644 index 00000000000..c209d2e8b63 --- /dev/null +++ b/tests/queries/0_stateless/03164_materialize_statistics.reference @@ -0,0 +1,10 @@ +10 +10 +10 +statistic not used Condition less(b, 10_UInt8) moved to PREWHERE +statistic not used Condition less(a, 10_UInt8) moved to PREWHERE +statistic used after merge Condition less(a, 10_UInt8) moved to PREWHERE +statistic used after merge Condition less(b, 10_UInt8) moved to PREWHERE +statistic used after materialize Condition less(a, 10_UInt8) moved to PREWHERE +statistic used after materialize Condition less(b, 10_UInt8) moved to PREWHERE +2 0 diff --git a/tests/queries/0_stateless/03164_materialize_statistics.sql b/tests/queries/0_stateless/03164_materialize_statistics.sql new file mode 100644 index 00000000000..763644d16ab --- /dev/null +++ b/tests/queries/0_stateless/03164_materialize_statistics.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS t_statistic_materialize; + +SET allow_experimental_analyzer = 1; +SET allow_experimental_statistic = 1; +SET allow_statistic_optimize = 1; +SET materialize_statistics_on_insert = 0; + +CREATE TABLE t_statistic_materialize +( + a Int64 STATISTIC(tdigest), + b Int16 STATISTIC(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO t_statistic_materialize SELECT number, -number FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM t_statistic_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistic not used'; + +OPTIMIZE TABLE t_statistic_materialize FINAL; + +SELECT count(*) FROM t_statistic_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistic used after merge'; + +TRUNCATE TABLE t_statistic_materialize; +SET mutations_sync = 2; + +INSERT INTO t_statistic_materialize SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE t_statistic_materialize MATERIALIZE STATISTIC a, b TYPE tdigest; + +SELECT count(*) FROM t_statistic_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistic used after materialize'; + +DROP TABLE t_statistic_materialize; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, message FROM system.text_log JOIN +( + SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log + WHERE current_database = currentDatabase() + AND query LIKE 'SELECT count(*) FROM t_statistic_materialize%' + AND type = 'QueryFinish' +) AS query_log USING (query_id) +WHERE message LIKE '%moved to PREWHERE%' +ORDER BY event_time_microseconds; + +SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) +FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE 'INSERT INTO t_statistic_materialize SELECT%' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03164_optimize_read_in_order_nullable.reference b/tests/queries/0_stateless/03164_optimize_read_in_order_nullable.reference new file mode 100644 index 00000000000..7e866b496a8 --- /dev/null +++ b/tests/queries/0_stateless/03164_optimize_read_in_order_nullable.reference @@ -0,0 +1,32 @@ +-- Reproducer result: +\N Mark 50 +1 John 33 +2 Ksenia 48 + +-- Read in order, no sort required: +0 0 +1 \N +4 4 +\N 2 +\N \N + +-- Read in order, partial sort for second key: +0 0 +1 \N +4 4 +\N \N +\N 2 + +-- No reading in order, sort for first key: +\N 2 +\N \N +0 0 +1 \N +4 4 + +-- Reverse order, partial sort for the second key: +\N 2 +\N \N +4 4 +1 \N +0 0 diff --git a/tests/queries/0_stateless/03164_optimize_read_in_order_nullable.sql b/tests/queries/0_stateless/03164_optimize_read_in_order_nullable.sql new file mode 100644 index 00000000000..7af6e55bf98 --- /dev/null +++ b/tests/queries/0_stateless/03164_optimize_read_in_order_nullable.sql @@ -0,0 +1,55 @@ +-- Reproducer from https://github.com/ClickHouse/ClickHouse/issues/63460 +DROP TABLE IF EXISTS 03164_users; +CREATE TABLE 03164_users (uid Nullable(Int16), name String, age Int16) ENGINE=MergeTree ORDER BY (uid) SETTINGS allow_nullable_key=1; + +INSERT INTO 03164_users VALUES (1, 'John', 33); +INSERT INTO 03164_users VALUES (2, 'Ksenia', 48); +INSERT INTO 03164_users VALUES (NULL, 'Mark', 50); +OPTIMIZE TABLE 03164_users FINAL; + +SELECT '-- Reproducer result:'; + +SELECT * FROM 03164_users ORDER BY uid ASC NULLS FIRST LIMIT 10 SETTINGS optimize_read_in_order = 1; + +DROP TABLE IF EXISTS 03164_users; + +DROP TABLE IF EXISTS 03164_multi_key; +CREATE TABLE 03164_multi_key (c1 Nullable(UInt32), c2 Nullable(UInt32)) ENGINE = MergeTree ORDER BY (c1, c2) SETTINGS allow_nullable_key=1; + +INSERT INTO 03164_multi_key VALUES (0, 0), (1, NULL), (NULL, 2), (NULL, NULL), (4, 4); +-- Just in case +OPTIMIZE TABLE 03164_multi_key FINAL; + +SELECT ''; +SELECT '-- Read in order, no sort required:'; + +SELECT c1, c2 +FROM 03164_multi_key +ORDER BY c1 ASC NULLS LAST, c2 ASC NULLS LAST +SETTINGS optimize_read_in_order = 1; + +SELECT ''; +SELECT '-- Read in order, partial sort for second key:'; + +SELECT c1, c2 +FROM 03164_multi_key +ORDER BY c1 ASC NULLS LAST, c2 ASC NULLS FIRST +SETTINGS optimize_read_in_order = 1; + +SELECT ''; +SELECT '-- No reading in order, sort for first key:'; + +SELECT c1, c2 +FROM 03164_multi_key +ORDER BY c1 ASC NULLS FIRST, c2 ASC NULLS LAST +SETTINGS optimize_read_in_order = 1; + +SELECT ''; +SELECT '-- Reverse order, partial sort for the second key:'; + +SELECT c1, c2 +FROM 03164_multi_key +ORDER BY c1 DESC NULLS FIRST, c2 DESC NULLS LAST +SETTINGS optimize_read_in_order = 1; + +DROP TABLE IF EXISTS 03164_multi_key; diff --git a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.reference b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.reference new file mode 100644 index 00000000000..a2aef9837d3 --- /dev/null +++ b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.reference @@ -0,0 +1,3 @@ +655360 +18 0 +2 1 diff --git a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql new file mode 100644 index 00000000000..652b27b8a67 --- /dev/null +++ b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql @@ -0,0 +1,40 @@ +-- Tags: no-random-settings, no-fasttest + +SET allow_prefetched_read_pool_for_remote_filesystem=0; +SET allow_prefetched_read_pool_for_local_filesystem=0; +SET max_threads = 1; +SET remote_read_min_bytes_for_seek = 100000; +-- Will affect INSERT, but not merge +SET s3_check_objects_after_upload=1; + +DROP TABLE IF EXISTS t_compact_bytes_s3; +CREATE TABLE t_compact_bytes_s3(c1 UInt32, c2 UInt32, c3 UInt32, c4 UInt32, c5 UInt32) +ENGINE = MergeTree ORDER BY c1 +SETTINGS index_granularity = 512, min_bytes_for_wide_part = '10G', storage_policy = 's3_no_cache'; + +INSERT INTO t_compact_bytes_s3 SELECT number, number, number, number, number FROM numbers(512 * 32 * 40); + +SYSTEM DROP MARK CACHE; +OPTIMIZE TABLE t_compact_bytes_s3 FINAL; + +SYSTEM DROP MARK CACHE; +SELECT count() FROM t_compact_bytes_s3 WHERE NOT ignore(c2, c4); +SYSTEM FLUSH LOGS; + +SELECT + ProfileEvents['S3ReadRequestsCount'], + ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 +FROM system.query_log +WHERE event_date >= yesterday() AND type = 'QueryFinish' + AND current_database = currentDatabase() + AND query ilike '%INSERT INTO t_compact_bytes_s3 SELECT number, number, number%'; + +SELECT + ProfileEvents['S3ReadRequestsCount'], + ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 +FROM system.query_log +WHERE event_date >= yesterday() AND type = 'QueryFinish' + AND current_database = currentDatabase() + AND query ilike '%OPTIMIZE TABLE t_compact_bytes_s3 FINAL%'; + +DROP TABLE IF EXISTS t_compact_bytes_s3; diff --git a/tests/queries/0_stateless/03165_distinct_with_window_func_crash.reference b/tests/queries/0_stateless/03165_distinct_with_window_func_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03165_distinct_with_window_func_crash.sql b/tests/queries/0_stateless/03165_distinct_with_window_func_crash.sql new file mode 100644 index 00000000000..e2e87fde35d --- /dev/null +++ b/tests/queries/0_stateless/03165_distinct_with_window_func_crash.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS atable; + +CREATE TABLE atable +( + cdu_date Int16, + loanx_id String, + rating_sp String +) +ENGINE = MergeTree +ORDER BY tuple(); + +-- disable parallelization after window function otherwise +-- generated pipeline contains enormous number of transformers (should be fixed separately) +SET query_plan_enable_multithreading_after_window_functions=0; +-- max_threads is randomized, and can significantly increase number of parallel transformers after window func, so set to small value explicitly +SET max_threads=3; + +SELECT DISTINCT + loanx_id, + rating_sp, + cdu_date, + row_number() OVER (PARTITION BY cdu_date) AS row_number, + last_value(cdu_date) OVER (PARTITION BY loanx_id ORDER BY cdu_date ASC) AS last_cdu_date +FROM atable +GROUP BY + cdu_date, + loanx_id, + rating_sp +SETTINGS query_plan_remove_redundant_distinct = 1; + +DROP TABLE atable; diff --git a/tests/queries/0_stateless/03165_parseReadableSize.reference b/tests/queries/0_stateless/03165_parseReadableSize.reference new file mode 100644 index 00000000000..57f17ecc5d3 --- /dev/null +++ b/tests/queries/0_stateless/03165_parseReadableSize.reference @@ -0,0 +1,60 @@ +1.00 B +1.00 KiB +1.00 MiB +1.00 GiB +1.00 TiB +1.00 PiB +1.00 EiB +1.00 B +1.00 KB +1.00 MB +1.00 GB +1.00 TB +1.00 PB +1.00 EB +1.00 MiB +1024 +3072 +1024 +1024 +1024 +1024 +1024 +\N +3217 +3217 +1000 +5 +2048 +8192 +0 0 0 +1 B 1 +1 KiB 1024 +1 MiB 1048576 +1 GiB 1073741824 +1 TiB 1099511627776 +1 PiB 1125899906842624 +1 EiB 1152921504606846976 +invalid \N +1 Joe \N +1KB 1000 + 1 GiB \N +1 TiB with fries \N +NaN KiB \N +Inf KiB \N +0xa123 KiB \N +1 B 1 +1 KiB 1024 +1 MiB 1048576 +1 GiB 1073741824 +1 TiB 1099511627776 +1 PiB 1125899906842624 +1 EiB 1152921504606846976 +invalid 0 +1 Joe 0 +1KB 1000 + 1 GiB 0 +1 TiB with fries 0 +NaN KiB 0 +Inf KiB 0 +0xa123 KiB 0 diff --git a/tests/queries/0_stateless/03165_parseReadableSize.sql b/tests/queries/0_stateless/03165_parseReadableSize.sql new file mode 100644 index 00000000000..33386268aa4 --- /dev/null +++ b/tests/queries/0_stateless/03165_parseReadableSize.sql @@ -0,0 +1,121 @@ +-- Should be the inverse of formatReadableSize +SELECT formatReadableSize(parseReadableSize('1 B')); +SELECT formatReadableSize(parseReadableSize('1 KiB')); +SELECT formatReadableSize(parseReadableSize('1 MiB')); +SELECT formatReadableSize(parseReadableSize('1 GiB')); +SELECT formatReadableSize(parseReadableSize('1 TiB')); +SELECT formatReadableSize(parseReadableSize('1 PiB')); +SELECT formatReadableSize(parseReadableSize('1 EiB')); + +-- Should be the inverse of formatReadableDecimalSize +SELECT formatReadableDecimalSize(parseReadableSize('1 B')); +SELECT formatReadableDecimalSize(parseReadableSize('1 KB')); +SELECT formatReadableDecimalSize(parseReadableSize('1 MB')); +SELECT formatReadableDecimalSize(parseReadableSize('1 GB')); +SELECT formatReadableDecimalSize(parseReadableSize('1 TB')); +SELECT formatReadableDecimalSize(parseReadableSize('1 PB')); +SELECT formatReadableDecimalSize(parseReadableSize('1 EB')); + +-- Is case-insensitive +SELECT formatReadableSize(parseReadableSize('1 mIb')); + +-- Should be able to parse decimals +SELECT parseReadableSize('1.00 KiB'); -- 1024 +SELECT parseReadableSize('3.00 KiB'); -- 3072 + +-- Infix whitespace is ignored +SELECT parseReadableSize('1 KiB'); +SELECT parseReadableSize('1KiB'); + +-- Can parse LowCardinality +SELECT parseReadableSize(toLowCardinality('1 KiB')); + +-- Can parse nullable fields +SELECT parseReadableSize(toNullable('1 KiB')); + +-- Can parse non-const columns fields +SELECT parseReadableSize(materialize('1 KiB')); + +-- Output is NULL if NULL arg is passed +SELECT parseReadableSize(NULL); + +-- Can parse more decimal places than Float64's precision +SELECT parseReadableSize('3.14159265358979323846264338327950288419716939937510 KiB'); + +-- Can parse sizes prefixed with a plus sign +SELECT parseReadableSize('+3.1415 KiB'); + +-- Can parse amounts in scientific notation +SELECT parseReadableSize('10e2 B'); + +-- Can parse floats with no decimal points +SELECT parseReadableSize('5. B'); + +-- Can parse numbers with leading zeroes +SELECT parseReadableSize('002 KiB'); + +-- Can parse octal-like +SELECT parseReadableSize('08 KiB'); + +-- Can parse various flavours of zero +SELECT parseReadableSize('0 KiB'), parseReadableSize('+0 KiB'), parseReadableSize('-0 KiB'); + +-- ERRORS +-- No arguments +SELECT parseReadableSize(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- Too many arguments +SELECT parseReadableSize('1 B', '2 B'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- Wrong Type +SELECT parseReadableSize(12); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- Invalid input - overall garbage +SELECT parseReadableSize('oh no'); -- { serverError CANNOT_PARSE_NUMBER } +-- Invalid input - unknown unit +SELECT parseReadableSize('12.3 rb'); -- { serverError CANNOT_PARSE_TEXT } +-- Invalid input - Leading whitespace +SELECT parseReadableSize(' 1 B'); -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED } +-- Invalid input - Trailing characters +SELECT parseReadableSize('1 B leftovers'); -- { serverError UNEXPECTED_DATA_AFTER_PARSED_VALUE } +-- Invalid input - Negative sizes are not allowed +SELECT parseReadableSize('-1 KiB'); -- { serverError BAD_ARGUMENTS } +-- Invalid input - Input too large to fit in UInt64 +SELECT parseReadableSize('1000 EiB'); -- { serverError BAD_ARGUMENTS } +-- Invalid input - Hexadecimal is not supported +SELECT parseReadableSize('0xa123 KiB'); -- { serverError CANNOT_PARSE_TEXT } +-- Invalid input - NaN is not supported, with or without sign and with different capitalizations +SELECT parseReadableSize('nan KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('+nan KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('-nan KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('NaN KiB'); -- { serverError BAD_ARGUMENTS } +-- Invalid input - Infinite is not supported, with or without sign, in all its forms +SELECT parseReadableSize('inf KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('+inf KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('-inf KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('infinite KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('+infinite KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('-infinite KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('Inf KiB'); -- { serverError BAD_ARGUMENTS } +SELECT parseReadableSize('Infinite KiB'); -- { serverError BAD_ARGUMENTS } + + +-- OR NULL +-- Works as the regular version when inputs are correct +SELECT + arrayJoin(['1 B', '1 KiB', '1 MiB', '1 GiB', '1 TiB', '1 PiB', '1 EiB']) AS readable_sizes, + parseReadableSizeOrNull(readable_sizes) AS filesize; + +-- Returns NULL on invalid values +SELECT + arrayJoin(['invalid', '1 Joe', '1KB', ' 1 GiB', '1 TiB with fries', 'NaN KiB', 'Inf KiB', '0xa123 KiB']) AS readable_sizes, + parseReadableSizeOrNull(readable_sizes) AS filesize; + + +-- OR ZERO +-- Works as the regular version when inputs are correct +SELECT + arrayJoin(['1 B', '1 KiB', '1 MiB', '1 GiB', '1 TiB', '1 PiB', '1 EiB']) AS readable_sizes, + parseReadableSizeOrZero(readable_sizes) AS filesize; + +-- Returns NULL on invalid values +SELECT + arrayJoin(['invalid', '1 Joe', '1KB', ' 1 GiB', '1 TiB with fries', 'NaN KiB', 'Inf KiB', '0xa123 KiB']) AS readable_sizes, + parseReadableSizeOrZero(readable_sizes) AS filesize; \ No newline at end of file diff --git a/tests/queries/0_stateless/03166_optimize_row_order_during_insert.reference b/tests/queries/0_stateless/03166_optimize_row_order_during_insert.reference new file mode 100644 index 00000000000..bbd87fb450c --- /dev/null +++ b/tests/queries/0_stateless/03166_optimize_row_order_during_insert.reference @@ -0,0 +1,78 @@ +Simple test +Egor 1 +Egor 2 +Igor 1 +Igor 2 +Igor 3 +Cardinalities test +Alex 1 63 0 +Alex 1 65 0 +Alex 1 239 0 +Alex 2 224 0 +Alex 4 83 0 +Alex 4 134 0 +Alex 4 192 0 +Bob 2 53 0 +Bob 4 100 0 +Bob 4 177 0 +Bob 4 177 0 +Nikita 1 173 0 +Nikita 1 228 0 +Nikita 2 148 0 +Nikita 2 148 0 +Nikita 2 208 0 +Alex 1 63 1 +Alex 1 65 1 +Alex 1 239 1 +Alex 2 128 1 +Alex 2 128 1 +Alex 2 224 1 +Alex 4 83 1 +Alex 4 83 1 +Alex 4 134 1 +Alex 4 134 1 +Alex 4 192 1 +Bob 2 53 1 +Bob 2 53 1 +Bob 2 187 1 +Bob 2 187 1 +Bob 4 100 1 +Nikita 1 173 1 +Nikita 1 228 1 +Nikita 2 54 1 +Nikita 2 54 1 +Nikita 2 148 1 +Nikita 2 208 1 +Equivalence classes test +AB 1 9.81 0 +A\0 0 2.7 1 +A\0 1 2.7 1 +B\0 0 2.7 1 +B\0 1 2.7 1 +A\0 1 42 1 +B\0 0 42 1 +A\0 0 3.14 \N +B\0 -1 3.14 \N +B\0 2 3.14 \N +AB 0 42 \N +AB 0 42 \N +B\0 0 42 \N +A\0 1 42 \N +A\0 1 42 \N +B\0 1 42 \N +Many types test +A\0\0\0\0\0 2020-01-01 [0,1.1] 10 some string {'key':'value'} (123) +A\0\0\0\0\0 2020-01-01 [0,1.1] \N example {} (26) +A\0\0\0\0\0 2020-01-01 [2.2,1.1] 1 some other string {'key2':'value2'} (5) +A\0\0\0\0\0 2020-01-02 [2.2,1.1] 1 some other string {'key2':'value2'} (5) +A\0\0\0\0\0 2020-01-02 [0,1.1] 10 some string {'key':'value'} (123) +A\0\0\0\0\0 2020-01-02 [0,2.2] 10 example {} (26) +B\0\0\0\0\0 2020-01-04 [0,2.2] \N example {} (26) +B\0\0\0\0\0 2020-01-04 [0,1.1] 10 some string {'key':'value'} (123) +B\0\0\0\0\0 2020-01-04 [2.2,1.1] 1 some string {'key2':'value2'} (5) +B\0\0\0\0\0 2020-01-05 [0,1.1] 10 some string {'key':'value'} (123) +B\0\0\0\0\0 2020-01-05 [0,2.2] \N example {} (26) +B\0\0\0\0\0 2020-01-05 [2.2,1.1] 1 some other string {'key':'value'} (5) +C\0\0\0\0\0 2020-01-04 [0,1.1] 10 some string {'key':'value'} (5) +C\0\0\0\0\0 2020-01-04 [0,2.2] \N example {} (26) +C\0\0\0\0\0 2020-01-04 [2.2,1.1] 1 some other string {'key2':'value2'} (5) diff --git a/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql b/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql new file mode 100644 index 00000000000..bb2f5e94d05 --- /dev/null +++ b/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql @@ -0,0 +1,98 @@ +-- Checks that no bad things happen when the table optimizes the row order to improve compressability during insert. + + +-- Below SELECTs intentionally only ORDER BY the table primary key and rely on read-in-order optimization +SET optimize_read_in_order = 1; + +-- Just simple check, that optimization works correctly for table with 2 columns and 2 equivalence classes. +SELECT 'Simple test'; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( + name String, + event Int8 +) ENGINE = MergeTree +ORDER BY name +SETTINGS allow_experimental_optimized_row_order = true; +INSERT INTO tab VALUES ('Igor', 3), ('Egor', 1), ('Egor', 2), ('Igor', 2), ('Igor', 1); + +SELECT * FROM tab ORDER BY name SETTINGS max_threads=1; + +DROP TABLE tab; + +-- Checks that RowOptimizer correctly selects the order for columns according to cardinality, with an empty ORDER BY. +-- There are 4 columns with cardinalities {name : 3, timestamp": 3, money: 17, flag: 2}, so the columns order must be {flag, name, timestamp, money}. +SELECT 'Cardinalities test'; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( + name String, + timestamp Int64, + money UInt8, + flag String +) ENGINE = MergeTree +ORDER BY () +SETTINGS allow_experimental_optimized_row_order = True; +INSERT INTO tab VALUES ('Bob', 4, 100, '1'), ('Nikita', 2, 54, '1'), ('Nikita', 1, 228, '1'), ('Alex', 4, 83, '1'), ('Alex', 4, 134, '1'), ('Alex', 1, 65, '0'), ('Alex', 4, 134, '1'), ('Bob', 2, 53, '0'), ('Alex', 4, 83, '0'), ('Alex', 1, 63, '1'), ('Bob', 2, 53, '1'), ('Alex', 4, 192, '1'), ('Alex', 2, 128, '1'), ('Nikita', 2, 148, '0'), ('Bob', 4, 177, '0'), ('Nikita', 1, 173, '0'), ('Alex', 1, 239, '0'), ('Alex', 1, 63, '0'), ('Alex', 2, 224, '1'), ('Bob', 4, 177, '0'), ('Alex', 2, 128, '1'), ('Alex', 4, 134, '0'), ('Alex', 4, 83, '1'), ('Bob', 4, 100, '0'), ('Nikita', 2, 54, '1'), ('Alex', 1, 239, '1'), ('Bob', 2, 187, '1'), ('Alex', 1, 65, '1'), ('Bob', 2, 53, '1'), ('Alex', 2, 224, '0'), ('Alex', 4, 192, '0'), ('Nikita', 1, 173, '1'), ('Nikita', 2, 148, '1'), ('Bob', 2, 187, '1'), ('Nikita', 2, 208, '1'), ('Nikita', 2, 208, '0'), ('Nikita', 1, 228, '0'), ('Nikita', 2, 148, '0'); + +SELECT * FROM tab SETTINGS max_threads=1; + +DROP TABLE tab; + +-- Checks that RowOptimizer correctly selects the order for columns according to cardinality in each equivalence class obtained using SortDescription. +-- There are two columns in the SortDescription: {flag, money} in this order. +-- So there are 5 equivalence classes: {9.81, 9}, {2.7, 1}, {42, 1}, {3.14, Null}, {42, Null}. +-- For the first three of them cardinalities of the other 2 columns are equal, so they are sorted in order {0, 1} in these classes. +-- In the fourth class cardinalities: {name : 2, timestamp : 3}, so they are sorted in order {name, timestamp} in this class. +-- In the fifth class cardinalities: {name : 3, timestamp : 2}, so they are sorted in order {timestamp, name} in this class. +SELECT 'Equivalence classes test'; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( + name FixedString(2), + timestamp Float32, + money Float64, + flag Nullable(Int32) +) ENGINE = MergeTree +ORDER BY (flag, money) +SETTINGS allow_experimental_optimized_row_order = True, allow_nullable_key = True; +INSERT INTO tab VALUES ('AB', 0, 42, Null), ('AB', 0, 42, Null), ('A', 1, 42, Null), ('AB', 1, 9.81, 0), ('B', 0, 42, Null), ('B', -1, 3.14, Null), ('B', 1, 2.7, 1), ('B', 0, 42, 1), ('A', 1, 42, 1), ('B', 1, 42, Null), ('B', 0, 2.7, 1), ('A', 0, 2.7, 1), ('B', 2, 3.14, Null), ('A', 0, 3.14, Null), ('A', 1, 2.7, 1), ('A', 1, 42, Null); + +SELECT * FROM tab ORDER BY (flag, money) SETTINGS max_threads=1; + +DROP TABLE tab; + +-- Checks that no bad things happen when the table optimizes the row order to improve compressability during insert for many different column types. +-- For some of these types estimateCardinalityInPermutedRange returns just the size of the current equal range. +-- There are 5 equivalence classes, each of them has equal size = 3. +-- In the first of them cardinality of the vector_array column equals 2, other cardinalities equals 3. +-- In the second of them cardinality of the nullable_int column equals 2, other cardinalities equals 3. +-- ... +-- In the fifth of them cardinality of the tuple_column column equals 2, other cardinalities equals 3. +-- So, for all of this classes for columns with cardinality equals 2 such that estimateCardinalityInPermutedRange methid is implemented, +-- this column must be the first in the column order, all others must be in the stable order. +-- For all other classes columns must be in the stable order. +SELECT 'Many types test'; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( + fixed_str FixedString(6), + event_date Date, + vector_array Array(Float32), + nullable_int Nullable(Int128), + low_card_string LowCardinality(String), + map_column Map(String, String), + tuple_column Tuple(UInt256) +) ENGINE = MergeTree() +ORDER BY (fixed_str, event_date) +SETTINGS allow_experimental_optimized_row_order = True; + +INSERT INTO tab VALUES ('A', '2020-01-01', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('A', '2020-01-01', [0.0, 1.1], NULL, 'example', {}, (26)), ('A', '2020-01-01', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)), ('A', '2020-01-02', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('A', '2020-01-02', [0.0, 2.2], 10, 'example', {}, (26)), ('A', '2020-01-02', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)), ('B', '2020-01-04', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('B', '2020-01-04', [0.0, 2.2], Null, 'example', {}, (26)), ('B', '2020-01-04', [2.2, 1.1], 1, 'some string', {'key2':'value2'}, (5)), ('B', '2020-01-05', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('B', '2020-01-05', [0.0, 2.2], Null, 'example', {}, (26)), ('B', '2020-01-05', [2.2, 1.1], 1, 'some other string', {'key':'value'}, (5)), ('C', '2020-01-04', [0.0, 1.1], 10, 'some string', {'key':'value'}, (5)), ('C', '2020-01-04', [0.0, 2.2], Null, 'example', {}, (26)), ('C', '2020-01-04', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)); + +SELECT * FROM tab ORDER BY (fixed_str, event_date) SETTINGS max_threads=1; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/data_bson/comments.bson b/tests/queries/0_stateless/data_bson/comments.bson index 9aa4b6e6562..06681c51976 100644 Binary files a/tests/queries/0_stateless/data_bson/comments.bson and b/tests/queries/0_stateless/data_bson/comments.bson differ diff --git a/tests/queries/0_stateless/data_bson/comments_new.bson b/tests/queries/0_stateless/data_bson/comments_new.bson new file mode 100644 index 00000000000..aa9ee9bdbb4 Binary files /dev/null and b/tests/queries/0_stateless/data_bson/comments_new.bson differ diff --git a/tests/queries/0_stateless/data_minio/03036_archive1.tar b/tests/queries/0_stateless/data_minio/03036_archive1.tar new file mode 100644 index 00000000000..55b3ddc745a Binary files /dev/null and b/tests/queries/0_stateless/data_minio/03036_archive1.tar differ diff --git a/tests/queries/0_stateless/data_minio/03036_archive1.zip b/tests/queries/0_stateless/data_minio/03036_archive1.zip new file mode 100644 index 00000000000..a76bca30711 Binary files /dev/null and b/tests/queries/0_stateless/data_minio/03036_archive1.zip differ diff --git a/tests/queries/0_stateless/data_minio/03036_archive2.tar b/tests/queries/0_stateless/data_minio/03036_archive2.tar new file mode 100644 index 00000000000..4cc3f6830a5 Binary files /dev/null and b/tests/queries/0_stateless/data_minio/03036_archive2.tar differ diff --git a/tests/queries/0_stateless/data_minio/03036_archive2.zip b/tests/queries/0_stateless/data_minio/03036_archive2.zip new file mode 100644 index 00000000000..8b49dc8d9f4 Binary files /dev/null and b/tests/queries/0_stateless/data_minio/03036_archive2.zip differ diff --git a/tests/queries/0_stateless/data_minio/03036_archive3.tar.gz b/tests/queries/0_stateless/data_minio/03036_archive3.tar.gz new file mode 100644 index 00000000000..88871764071 Binary files /dev/null and b/tests/queries/0_stateless/data_minio/03036_archive3.tar.gz differ diff --git a/tests/queries/0_stateless/data_minio/03036_compressed_file_archive.zip b/tests/queries/0_stateless/data_minio/03036_compressed_file_archive.zip new file mode 100644 index 00000000000..619f81327a8 Binary files /dev/null and b/tests/queries/0_stateless/data_minio/03036_compressed_file_archive.zip differ diff --git a/tests/queries/0_stateless/data_minio/03036_json_archive.zip b/tests/queries/0_stateless/data_minio/03036_json_archive.zip new file mode 100644 index 00000000000..31aa2c168b2 Binary files /dev/null and b/tests/queries/0_stateless/data_minio/03036_json_archive.zip differ diff --git a/tests/queries/0_stateless/data_parquet/native_parquet_reader.parquet b/tests/queries/0_stateless/data_parquet/native_parquet_reader.parquet new file mode 100644 index 00000000000..c0d222342e3 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/native_parquet_reader.parquet differ diff --git a/tests/queries/1_stateful/00091_prewhere_two_conditions.sql b/tests/queries/1_stateful/00091_prewhere_two_conditions.sql index cbfbbaa2662..cd88743160c 100644 --- a/tests/queries/1_stateful/00091_prewhere_two_conditions.sql +++ b/tests/queries/1_stateful/00091_prewhere_two_conditions.sql @@ -14,6 +14,6 @@ WITH toTimeZone(EventTime, 'Asia/Dubai') AS xyz SELECT uniq(*) FROM test.hits WH SET optimize_move_to_prewhere = 0; SET enable_multiple_prewhere_read_steps = 0; -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } -SELECT uniq(URL) FROM test.hits PREWHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError TOO_MANY_BYTES } +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError TOO_MANY_BYTES } +SELECT uniq(URL) FROM test.hits PREWHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError TOO_MANY_BYTES } diff --git a/tests/queries/1_stateful/00144_functions_of_aggregation_states.sql b/tests/queries/1_stateful/00144_functions_of_aggregation_states.sql index c5cd45d68b3..e30c132d242 100644 --- a/tests/queries/1_stateful/00144_functions_of_aggregation_states.sql +++ b/tests/queries/1_stateful/00144_functions_of_aggregation_states.sql @@ -1,3 +1,3 @@ -SET allow_deprecated_functions = 1; +SET allow_deprecated_error_prone_window_functions = 1; SELECT EventDate, finalizeAggregation(state), runningAccumulate(state) FROM (SELECT EventDate, uniqState(UserID) AS state FROM test.hits GROUP BY EventDate ORDER BY EventDate); diff --git a/tests/queries/1_stateful/00175_counting_resources_in_subqueries.sql b/tests/queries/1_stateful/00175_counting_resources_in_subqueries.sql index fe7837d7ff1..63eca96414f 100644 --- a/tests/queries/1_stateful/00175_counting_resources_in_subqueries.sql +++ b/tests/queries/1_stateful/00175_counting_resources_in_subqueries.sql @@ -1,20 +1,20 @@ -- the work for scalar subquery is properly accounted: SET max_rows_to_read = 1000000; -SELECT 1 = (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError 158 } +SELECT 1 = (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError TOO_MANY_ROWS } -- the work for subquery in IN is properly accounted: SET max_rows_to_read = 1000000; -SELECT 1 IN (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError 158 } +SELECT 1 IN (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError TOO_MANY_ROWS } -- this query reads from the table twice: SET max_rows_to_read = 15000000; -SELECT count() IN (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)) FROM test.hits WHERE NOT ignore(AdvEngineID); -- { serverError 158 } +SELECT count() IN (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)) FROM test.hits WHERE NOT ignore(AdvEngineID); -- { serverError TOO_MANY_ROWS } -- the resources are properly accounted even if the subquery is evaluated in advance to facilitate the index analysis. -- this query is using index and filter out the second reading pass. SET max_rows_to_read = 1000000; -SELECT count() FROM test.hits WHERE CounterID > (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError 158 } +SELECT count() FROM test.hits WHERE CounterID > (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError TOO_MANY_ROWS } -- this query is using index but have to read all the data twice. SET max_rows_to_read = 10000000; -SELECT count() FROM test.hits WHERE CounterID < (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError 158 } +SELECT count() FROM test.hits WHERE CounterID < (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError TOO_MANY_ROWS } diff --git a/tests/queries/1_stateful/00181_cross_join_compression.reference b/tests/queries/1_stateful/00181_cross_join_compression.reference new file mode 100644 index 00000000000..ec79cc40fe7 --- /dev/null +++ b/tests/queries/1_stateful/00181_cross_join_compression.reference @@ -0,0 +1,1000 @@ +33553863 http://ultival.1septs +33553862 http://full%2F4a%2Fartiry/prival Pandar &clid=4832.html?html&lang=ru&lr=2&msid=203&banerid +33553862 http://full%2F4a%2Fartiry/prival Pandar &clid=4832.html?html&lang=ru&lr=2&msid=203&banerid +33553742 https://myz.sutoclub +33553742 https://myz.sutocation{fontaktering=0&session.ru/index.ru/apltras[5]=0&engine_power/user-efender +33553742 https://myz.sutocation/search?text=коллей и солнце в +33553161 http://life-pri-burnoe-dlya-and-na-chudopoly +33553161 http://life-pri-burnoe-dlya-and-na-chudopoly +33553118 http://news%2F30%2F1.2&name":"30.475065&text=в контакте +33552544 http://m.yandsearch?text=юнис +33552455 http://maps.yandex.ru/fashionnya_10000940877/?action-lacost_mob_contakte,facenews/864490.html?accountryasam-full +33552449 http://top.rbc.ua/погода-харьков&img_url=http://xyyatsya.html&lang=ru&lr=2779077&text=арабатывающих +33552449 http://top.rbc.ru/yandsearch?text=вання +33552449 http://top.rbc.ru/yandsearch?text=ванные целевизора&lr=213&msid=2083696&text=как +33552449 http://top.rbc.ru/yandsearch?clid=90 +33552449 http://top.rbc.ru/onliner.play.php?s=8 +33552449 http://top.rbc.ru/online.ru/disk.ru/video/embed +33552449 http://top.rbc.ru/online.com.tr/guncel/frl-2/233 +33552449 http://top.rbc.ru/myfics.pulse.ru/yandsearch?lr=48&perialu.net +33552449 http://top.rbc.ru/myfics.pulscence/2014/91269707723/item/6961%26nid +33552328 http://kupit-i-lynn-na-vtorders/dokto.ru/touchshie_na_skladnaia-vlozhenshchadki +33552328 http://kupit-i-lynn-na-tastyagazeta.ru/tverdar.org/bin.ru/viewtopics/174287200029444776ffa1ba91d43030e +33552216 http://video/58785/?promo=10347&text=фильмы она для +33552216 http://video/58785/?promo=10347&text=фильмы она для +33552216 http://finam.intel +33552216 http://finam.inpearls.html&lang=ru&lr=959&text=вконтаж +33552099 http://rg.ru/remont_give-schet/shop.novosti/2014/9116-spalnyj-so +33551977 http://search +33551857 http://yandex.ua/uk/news=articles/485/mode=action=FR,B9 +33551857 http://yandex.ua/uk/news=articles/485/mode=action=FR,B9 +33551857 http://yandex.ua/uk/news=articles/485/mode=action=FR,B9 +33551857 http://yandex.ua/uk/news=articles/485/mode=action=FR,B9 +33551857 http://yandex.ru/public-tools/sanatify=nogeo=0&secret=52436 +33551857 http://yandex.ru/public-tools/sanatify=nogeo=0&secret=52436 +33551857 http://yandex.ru/public-tools/sanatify=nogeo=0&secret=52436 +33551857 http://yandex.ru/2402838/?content=on&_orders_num=4907.html#videos/doxodover/fotostransfer +33551274 http://mysweet-tavr.ru/yandex.ru/yandsearch?clid=13259827&spn +33551274 http://mysweet-tam-3-silya +33551274 http://mysweet-tam-3-silya +33550899 http://astroisshke +33550844 http://yandex.ru/cgi-bin/msglist/meshnyi-mezhdunarod +33550844 http://yandex.ru/?clid=20677839/pro-vospalnii-ot-mashnie_novosti/zakon +33550801 http://sibdomashnyj-cvet-telej-modanija +33550801 http://sibdom.net/ru/pozdrav.narutoprague/auto/geneva2014 +33550801 http://sibdom.net/ru/pozdrav.narutoprague/auto/geneva2014 +33550801 http://sibdom.com/top22013/summi-dlya-vsem-yumor_prezidents +33550801 http://sibdom.com/search +33550801 http://sibdom.com/search +33550801 http://sibdom.com/search +33550801 http://sibdom.com/search +33550801 http://sibdom.com/search +33550801 http://sibdom.com/search +33550801 http://sibdom.com/magayutsya-posle-yontent.ru +33550801 http://sibdom.29.ru/articles/ya-ukray/doktor +33550204 http://studio/dist_pink-dis +33549664 http://gde_timeout=144037&lr=50&from=direcommepage=0&ad_info=ElsdCQRfSVFsBgBTAw +33549406 http://yandex.ua/user_app_versianfood.com/search?cl4url=1primea-materisi-puloveplane +33549201 http://fashing.net/testdrive_key=506d9e3dfbd268e6b6630e58&doma +33549131 http://images/216377821524.13951616 +33549116 https://m.haberler.ru/braun/9408215.shtml&lang=ru +33549116 https://m.haberler.ru/braun/9408215.shtml&lang=ru +33549116 https://m.haberler.ru/Kovalidad.fisha.lebek tanka.com.ua/news +33549091 http://zagrams=bid%3D84841.shtml/ru/store/xml_catalog/view/2014/3 +33549091 http://zagradskaja/2011.18432133/11/maps.yandex.php?promo +33548853 http://predir%3Fid%3D0%26height%3Daf1ea8a2981 +33548697 http://yandex.ru/jobs/florange_key=&availa-vtornye +33548648 http://worldoftanks.ru/load.cgi%3Fsid +33548648 http://worldoftanks.ru/140-seasonvar +33548648 http://worldoftanks.aspx&refererleniya-bilgisa-roslanding_url +33548648 http://worldoftanks.aspx&referereezume/vorona.org/news.yandsearch +33548648 http://worldoftanks.aspx&referereezultator/anne_postami.ru/3628 +33548648 http://worldoftanks.aspx&referereezultator/anne_postami.ru/3628 +33548648 http://worldoftanks.aspx&referereezhischet_dorovideo/search?lr +33548648 http://worldoftanks.aspx&referereezhimost?q=грузкаопмо +33548648 http://worldoftanks.aspx&referereezhimost?q=грузкаопмо +33548648 http://worldoftanks.aspx&referereezhimost?p=5758415845392595002181 +33548648 http://worldoftanks.aspx&referereezhdunarodov.ru/igri7.ru/cher +33548648 http://worldoftanks.aspx&referereezhdugor.com/webhp?espv=1&ie=UTF +33548648 http://worldoftanks.aspx&referereezhda-devusher.html&langualeo +33548648 http://worldoftanks.aspx&referereezhda-devusher.html&langualeo +33548648 http://worldoftanks.aspx&referereezh&api_resuet +33548648 http://worldoftanks.aspx&referency=UAH&job_interzhalsya +33548361 http://wot/htm/wotlauncheskim_tatavni.ru%2F&is_mobile +33547846 http://yenisafak.com.ua/погода на можно ли скачать контакте&lr=194 +33547361 http://acunn.mk.ru +33546008 http://yandex.ru/syndyk.ru/yandex.ru/view&idtv=2182 +33545847 http://f5haber7.com/play.ru/2012/100135364&secret_besplate.net/gruzovik +33545847 http://f5haber7.com/play.ru/2012/100135364&secret_besplate.net/gruzovik +33545847 http://f5haber7.com/play.ru/2012/100135364&secret_besplate.net/gruzovik +33545847 http://f5haber7.com/play.ru/2012/100135364&secret_besplate.net/gruzovik +33545847 http://f5haber7.com/play.ru/2012/100135364&secret_besplate.net/gruzovik +33545847 http://f5haber7.com/play.ru/2012/100135364&secret_besplate.net/gruzovik +33545847 http://f5haber7.com/play.php?id=1989605&lr=213&msid=2749/?page +33545847 http://f5haber7.com/kayintov +33545847 http://f5haber7.com/kayintov +33545847 http://f5haber7.com/kayintov +33545847 http://f5haber7.com/?cid=19842783179368 +33545847 http://f5haber7.com&publications/mysel-omeganovsk/telegraf +33545847 http://f5haber7.com&publications/67459577859 +33545579 http://mamba.kg/#!/videntnogo-putin-show_doc_LAW +33545564 http://rnd.kz/poisk.com/iframe +33545404 http://home/shok_popundem/erohd +33545404 http://home/lp1/?p=4&t=583906723&text=пошив бюстгальтик +33545404 http://home/lp1/?p=10&lr=213&text=супер +33545397 http://mp3/20/139377/link7 +33544214 http://fanatik.ru/00003536430/?gcv_source=wizard&category/4dev.ru/polos.ru/kosmeteo.ua/polit/1741824424/?from=email.ru/yandex.ru +33543373 http://base.com.tr/firefox/295771534.shtml/ru/news.yandsearch?text=люблю 2 серия скачать +33543373 http://base.com.tr&user=user_all=76 +33542831 https://e.mail=125456_0.html_params=bid%3D1%26xdm_e=http://news/hotel_2_druzya-trana.ru/3275/?from=odnoklass/page=1#compila-ilici-ayakkan/friendsms.ru/registralizaciya-seriya-rosiya-tv.net/?next=/id/openphX21pbmlzdHZvery +33542831 https://e.mail.ru/my/#actions_510473759731&text=search.php?show_banner_click_id=-1&custoe +33542407 http://wot/htm/wot/golder-uyku-3653883720 +33542270 https://moscow/ru/novostranamadi-47312196&acce285092.html/ru/lisi-na-par-liga.net/download +33542270 https://moscow/ru/live/2014&sociologin=pros-pressage/vladtimeout=14403395141 +33542270 https://moscow/full-up-nnn.ru/index.kz/cars.autoplus-muthisweet.ru/00032014/03/melkova-uvere.html&lang=ru&lr=1946562&win +33542270 https://moscow/episode-com.tr/dizi-gunleri/kopevsk/?one=zoneid=2073/?frommanduyuruindex.ru/yandex.ru/yandex +33542270 https://hugesex.tv/th/ญี่ปุ่น-slozhnoe.ua/villa_199855362 +33542006 http://7ya.ru/Video/embed +33542002 http://gaz/2117/start.tv/?cutter&l10n=ru&lr=56&text=смотреть +33541260 http://yandex.ru/filmId=ydx&iv=ZLZHVUVlsySgV +33541260 http://yandex.ru/filmId=ydx&iv=ZLZHVUVlsySgV +33541260 http://fast-trybu-viyskiy-yagoji.com +33541260 http://fast-trybu-viyskiy-yagoji.com +33541260 http://fast-trybu-viyskiy-yagoji.com +33541260 http://fast-trybu-video/83506.html?themoscow/90/inciden sans-viborsa/#788 +33541222 http://mg.mgshare.com/r2/playerinvestions/629807703503541977079 +33541084 http://yandex.ru/news.yandsearch?text=нара 2 на начнеток&img_url +33541084 http://yandex.ru/news.yandex.ru/polovo_20014, это такой Александр по порно сантехник +33540897 http://small.ru/yandex.ru/photosession +33540564 http://wwww.forexpf.ru/Onliner.ru/spaces.ru/ +33540564 http://wwww.ford_id|8610871/train-danies/skachenie +33540564 http://wwww.ford-sparolevka.bigmir.net/ru/post/video-food/detskij-troit/ +33540506 http://news/bannetteyen-sostan.ua/kids=67774.1395223/19/chastnikapelleri.gen.tr/oauth_sig=d7e8342341&lr=25&secret=9c4c5ecf578b7f3 +33540506 http://news/bannettey.ru/retarutop&text=в контакте&clid=13953422.1395129/index.ru/my/messa +33539800 http://clubmansk.rabota +33539286 http://all/pala-deistvennoreask=1 +33538555 http://kler.ru/video/warplanberri.net/ +33538396 http://finans.html&tld=ru +33537935 http://yandex.ua/politics/17513810d4fgdTWw.. +33537265 http://yandex.ru/pers/1-0-671-kompyuterologinza.ru/id/111/149/milltext +33536919 http://aydan&type=2&sort=natik.ua/desktop/in/80949 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=2738 +33536857 http://spartmentHelper&id=24086074727284&t=2&pos=29&hotels.html +33536857 http://spartmentHelper&id=240002152955821-deti/yandex.ru +33536857 http://spartmentHelper&id=240002152955821-deti/yandex.ru +33536857 http://spartmentHelper&id=240002152955821-deti/yandex.ru +33536857 http://spartmentHelper&id=240002152955821-deti/yandex.ru +33536857 http://spartmentHelper&id=240002152955821-deti/yandex.ru +33536857 http://spartmentHelper&id=240002152955821-deti/yandex.ru +33536857 http://spartmentHelper&id=240002152955821-deti/yandex.ru +33536857 http://spartmentHelper&id=240002152955821-deti/yandex.ru +33536857 http://spartmentHelper&id=240&text=стоит +33536857 http://spartmentHelper&id=240&text=герб россия, Республик +33536771 http://konster.ru/yandsearch?clid=1222/480786176723/?from +33536771 http://konster.ru/yandsearch?clid=1222/480786176723/?from +33536771 http://konster.ru/v-rossiya?n=2506461 +33536771 http://konster.ru/v-rossiya?n=2506461 +33536771 http://konster.php?Itemid=217867427535/?refreakinelegraf.html/ru/read.php +33536771 http://konster.gs.html/russichat/188077.html&former.ru/yandex.ru/0000000762 +33536771 http://konster.gs.html/russichat/188077.html&former.ru/yandex.ru/0000000762 +33536771 http://konster.com/?fb_source=vk&refererigroka-klub +33536771 http://konster.com/?fb_source=vk&refererigroka-klub +33536771 http://konster.com.tr/oauth=0&drivery/en/search=Отправильные фильм +33536771 http://konster.com.tr/newreg=1&auth.rosrees +33536771 http://konster.com%2Fnews.liga.net/journal +33536588 http://autone.html&gm=1&lang=ru +33536164 http://wotlaunchestvij-posts%2F2014/03/20/post.com/en/spravda.ru/neli.html?item_id=040103062&text=set +33535954 https://e.mail.yandex +33535840 http://yandex.ru/newsru.com.tr&user_id%3D84845.17.30.13953979&text=саламбрекены +33535746 http://wot/htm/wotlauncher.html&lang=ru +33535713 http://amk-widget/?overny +33535713 http://amk-widget/?overny +33534920 http://collash%3D2%26check=0&lang=ru&lr=1139&text=приколепный +33534613 http://topbest-fears.ru/nachali +33534534 http://kolesinda-nachinov-site_slot +33534469 http://small.com.tr/gunesia-mona-i-post.ru/yandex.ru/forbes +33534454 http://favto-referentop.ru/questink.com&public%2F20140317/218502595&metroveschits/24000092766 +33534116 https://male-hologinza.ru/Krist.org/radioveshilapii-2013-06-03-22 +33534056 http://cybers.com/index.ru/krashivai.ru/fp/derinburg/mototours +33534056 http://cybers.com.ua/sprashirsk +33533945 http://sefania/acers[]=3&ptl=0&clid=9403&lang=ru&lr=23&clid=993105-20338816573.18858.139539063&acce +33533945 http://sefania/acers/oley.html?html/ru/video/search?cl4url=http://home/politics/1700475_0&rnd=9582 +33533945 http://sefania/acerfax.ru/Suzuki_zavtrax.com/r/rbc.ru/Mukhi-spletnie-luchat.com/webcache/amk-windows +33533945 http://sefania/acereshebnik/102451185/?ref=http:%2F%2Fimage&lr=113&text=мультики&clid=1985544 +33533945 http://sefania/aceramtp.info=sw-1349-wh-768-ww-1351&rpt=simages/inbox/728x90/?from=7&templatotv +33533945 http://sefania/acerambler.ru/Отели +33533945 http://sefania/acerambler.ru/yandex.ru/yandex.ru/polikar.info +33533945 http://sefania/acerambler.ru/yandex.ru/hotels/leonliner.bjk.html&lang=ru&lr=213&oprnd=65864284.html?html?stat=0&category/stesi.com +33533945 http://sefania/acerambler.ru/yandex.ru +33533945 http://sefania/acerambler.ru/pyat-zhens.ru/recipes-topol-tri-degistemy/messages/index.ru/lg/lg_optik.com/view.html#.UxDJCdiRHsBCx9 +33533945 http://sefania/acerambler.ru/publiconnId=199291385fb7308ec3ee12d0c263bd778bfaf924d6a121291&lr=236&text +33533945 http://sefania/acerambler.ru/neo2/#inbox/?_1ld=1_10004182473169713.html?html/ru/kaluga.ru/yandex.ua +33533945 http://sefania/acerambler.ru/neo2/#inbox +33533945 http://sefania/acerambler.ru/moscowmap.php?p=3&clid=1989274 +33533945 http://sefania/acerambler.ru/menu.ru/yandex.ru/neo2/#inbox/?lang=ru&lr=13&clid=205519&text=закупки плакал на тильних +33533945 http://sefania/acerambler.ru/list?folder +33533945 http://sefania/acerambler.ru/inworld of MFP +33533945 http://sefania/acerambler.ru/from=formit +33533945 http://sefania/acerambler.ru/disk.ru/yandex +33533945 http://sefania/acerambler.ru/desk.spb.rabota +33533945 http://sefania/acerambler.ru/content.ws/grinews.yandsearch=FZ +33533945 http://sefania/acerambler.ru/commu-dva +33533945 http://sefania/acerambler.ru/cards.a5ltd +33533945 http://sefania/acerambler.ru/action=1.29081 +33533945 http://sefania/acerambler.ru/51/dollakov-pro.ru/neo2/#inbox/?lang=ru&lr=65&noreask=1&secret=cook-tv.ru/news-14546403357&a_id=vyZ1DKVQDYH08ZCc4FHhcJkMuAiYIHhMJWjIMES0QIhcpFx1xND5WWktYejZaOG9QWWt2ZAVfUFlATAV7QV9Gempe +33533945 http://sefania/acerambler.rotahabr.ru/korre/politics/game-online.at.ua/?ll=37.61767/?page=0&pos=6.0.2&vers.ru%2F2014-h154&noreask +33533945 http://sefania/acerambler.net/ru/theme=default +33533945 http://sefania/acerambler.mirovanie-foto +33533945 http://sefania/acerambler.html_part182434 +33533945 http://sefania/acerambler.com/web/webhp?espv=1&ie=UTF-8&l=vi&p=AhY_cQVTQQZLAVEDZFcJG1F4XldReWVtUEhEGHVfV1dnY3wPFWVyGmMFVRgHC0JdUyd7NGJzCHd7dVNSTlppOG1 +33533945 http://sefania/acerambler.com/index.ru +33533945 http://sefania/acerambler.com/futbol/Russing-skforummaonliner.raiserapidfilmId=qFYW5kZXgucnUvaGVua28 +33533945 http://sefania/acerambler-games.mail +33533945 http://sefania/acerambler&utm_campbell +33533945 http://sefania/acer_18_id448ecc0fddf4 +33533945 http://sefania/acer.ru/yandex.ru/?p=3&carfinCar%3D11921052307886.281500.159318&curre.ru/article +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-sum=100&lr=213&noheader +33533945 http://sefania/acer-dodgeraturalsk/motobralime.ru/yandsearch_kodelicheskim_kachej-i-pidzhanyy-za +33533945 http://sefania/acer-chto-vip-cakir-ilk-test-help.2gis.ru/postuchastnicyn-prezident.su/forum.ucoz +33533945 http://sefania/acer-amatvii-kili-pro-zhiznyu-needineniya-2012/04/hata.ru/name=d7f4a6a2bb32c889713463181048 +33533945 http://sefania/accountrysis +33533945 http://sefania/accountry":1,"bdate:descript/segodnya +33533945 http://sefania/accountry +33533945 http://sefania/accounter.start.airline/popular/1/o-patrix.ru/yandsearch?p=175&viewtopic/77-1-0 +33533945 http://sefania/accounter.start.airline.sberbankionline.html?id=1879974,55.7868151049.139540135 +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/accounter.ru/yandex.ru/neo2/#messages.yandex.ru +33533945 http://sefania/account +33533945 http://sefania/account +33533945 http://sefania/account +33533945 http://sefania/accordsPerPage +33533945 http://sefania/accords&projects/chto-delat_rassyobse-v-zhit_list +33533945 http://sefania/accords&projects/12807.html?t=1106/#photo.qip.ru +33533945 http://sefania/accords&projectId=403170558%26customat-avtomashniy +33533945 http://sefania/accords&projectId=18827.html#47 +33533945 http://sefania/accords&project-syn-sonuclari +33533945 http://sefania/accords&project-strelyam-v-porner_click_id=2413714e82e72 +33533945 http://sefania/accords&project-site_id=40317%2F03%2F1000000.html +33533945 http://sefania/accords&project-site_id=0&is_app_id=20836&group=6079 +33533945 http://sefania/accords&project-site_id +33533945 http://sefania/accords&project-site=actic.livetv.sx/lenta.ru/moscow +33533945 http://sefania/accords&project-site.ru +33533945 http://sefania/accords&project-site-lyubvi +33533945 http://sefania/accords&project-sinin/?news +33533945 http://sefania/accords&project-sini-chi +33533945 http://sefania/accords&project-simite-nauchnistration +33533945 http://sefania/accords&project-proissha +33533945 http://sefania/accords&project-partnaya +33533945 http://sefania/accords&project-pamyatilsya.html/rus/news.yandsearch +33533945 http://sefania/accords&project-nauchenprosport +33533945 http://sefania/accords&project-natu-vodons/kalitics/5629499954055 +33533945 http://sefania/accords&project-namics%2F03 +33533945 http://sefania/accords&project-namer-4 +33533945 http://sefania/accords&project-name=aa642af6c9 +33533945 http://sefania/accords&project-name=&p +33533945 http://sefania/accords&project-name":"Красная +33533945 http://sefania/accords&project-nalog.com +33533945 http://sefania/accords&project-nalog-global +33533945 http://sefania/accords&project-nala.com +33533945 http://sefania/accords&project-na-zhelises.name=Zombi/25-pevchukovich +33533945 http://sefania/accords&project-na-photofunia +33533945 http://sefania/accords&project-na-kefirebitelstva_ukrainala_gosobytiya +33533945 http://sefania/accords&project-na-kachat +33533945 http://sefania/accords&project-na-detskie +33533945 http://sefania/accords&project-na-deputati +33533945 http://sefania/accords&project-na-dairy-iz-pena.com/obozrevatehomeapps +33533945 http://sefania/accords&project-female bold man/vacancy3446133&text +33533945 http://sefania/accords&project-favoru.com +33533945 http://sefania/accords&project-favori_derejit +33533945 http://sefania/accords&project-famme=08 +33533945 http://sefania/accords&project-familyspaces +33533945 http://sefania/accords&project-familyeva +33533945 http://sefania/accords&project-PF=214937 +33533945 http://sefania/accords&project-PF=214748165.html?ext=комедії +33533945 http://sefania/accords&project-PF=214283 +33533945 http://sefania/accords&project-PF=18084 +33533945 http://sefania/accords&project-PF=180&CarType +33533945 http://sefania/accords&project wars-3-sezon-2-bolumia 7107836/ +33533945 http://sefania/accords&project wanitelstvo_menenta-uaz-po-vinni +33533945 http://sefania/accords&project - Reckout/coverrikov-service-antier +33533945 http://sefania/accord +33533945 http://sefania/acciya +33533945 http://sefania/access_perekrasnogo +33533945 http://sefania/access_perekrasnogo +33533945 http://sefania/access_perekrasnogo +33533945 http://sefania/accebback_url=ria.ru/person/1126-yubimogo-obstvennok +33533945 http://sefania/accebback_url=ria.ru/person/1126-yubimogo-obstvennok +33533945 http://sefania/accebback_url=ria.ru/person/1126-yubimogo-obstvennok +33533945 http://sefania/accebback_url=http://ulogin?next=сс карта-pochi/zvezdov/teente_200599&api +33533945 http://sefania/accebback_url=http://rdfo.ru/neo2/#inbox/?back=1&source=googogic/start=0&costudia +33533945 http://sefania/accebback_url=http://bfm.ru/be.php?f=2892.2177/blogs.yandex.ua/category/pass.com +33533945 http://sefania/acceb9191d +33533945 http://sefania/acce163a15ca1cda8e +33533945 http://sefania/accbook +33533945 http://sefania/academika +33533945 http://sefania/academic.academo/mazdanie-parner.by/ +33533945 http://sefania/ac/auto.ru/neft-autoStart +33533945 http://sefania/ac/auto.ru/neft-autoStart +33533945 http://sefania/ac/auto.ru/neft-autoStart +33533945 http://sefania/ac/auto.ru/neft-autoStart +33533945 http://sefania/ac/auto.ru/neft-autoStart +33533945 http://sefania/ac/auto.ru/neft-autoStart +33533945 http://sefania/ac/auto.ru/neft-autoStart +33533945 http://sefania/ac/auto.ru/neft-autoStart +33533945 http://bonprix.ru/2014/912338.html&lang=ru&lr=20569 +33533815 http://gyps.ru/modul.rambler.ru/welcome/popgun.ru/yandsearch?clid=1923030318/1513475 +33533815 http://gyps.ru/modul.rambler.ru/welcome/popgun.ru/yandsearch?clid=1923030318/1513475 +33533815 http://gyps.ru/modul.rambler.ru/welcome/popgun.ru/yandsearch?clid=1923030318/1513475 +33533815 http://gyps.ru/modul.rambler.ru/deales-posobiyane_iz_kottei-200398904&text=как +33533815 http://gyps.ru/modul.rambler.ru/deales-posobiyane_iz_kottei-200398904&text=как +33533706 http://zp.php?carfinTpl%3D//adultmir.ru/yandex.ua/obyasnenie-vozduh +33533559 http://10kp +33533559 http://10kp +33533554 http://zhannaliz/yandex.ru/rubric/194-adekvator%2Fyayinskij-kaldirme +33533554 http://zhannaliz/showbiz +33533341 http://newsru.com/webhp?espv=1&ie=UTF +33532815 http://zerx.ru/moscow/ru/dzerzhd-vipstatuazh-prigotovij-neobraznye_chem/signup/?from=email.ru/sport.ru/news/tits-forums +33532556 https://expertyj_redir-1440&cation.html&lr=10347-v-rublya-pritsell +33532217 http://tulus-oblagovopoltsou.com/top100 +33532142 http://image&lr=4&text=часы +33532021 http://ld.ru/test1.radiorecheski-na-domachnomu-karpatents +33532021 http://ld.ru/test-dney-payi-teknology.ru/tzarin-durnals.tv/movies.ru/prom.news.ru%2Fimg1.livejournal +33531865 http://wotlauncher/frau-line +33530939 http://pda.net/downloader_ben_10_legend.ru +33530939 http://pda.net/download/zapgames.ru/vidyat-lyubov.com.tr&callback=1&search?lr=65&redircnt=13952471887 +33530939 http://pda.net/download/tv_taris-yerel-i-grania/sale/48828-fin.html +33530939 http://pda.net/download.html?html?id=1955405805/24/market.ru/inbox/?lang=ru&lr=213&text=санкции Детский +33530767 http://e.mail.rambler.ru/idei.ru/yandex +33530735 http://rambler.ru/viewer_id +33530351 http://minprokurortmanfirmudahan/an140318 +33530351 http://minprokopievsk.ru/kiev.pro/commepage=0&sid=8082872.139517112091842&win=111139518073.shmini-aviabil-footovit +33530351 http://minproklassniki.ru/mozila-fokushek.html&lang=ru&lr=14&l=map,trfe&trfm=cursii-otvetics%2Fpremika_karmens.com.html?pid=90991 +33530351 http://minproki.ru/new/adsensek_oon_objazanie/139515&clid=9582&text=адрес свежие на австрижак день секс +33530351 http://minproki.com/erike-dlya-bgg108348/1243669&sig=6baa7f9d6b799548339/?promeclipy/1-1-600-1-0-200713/?promo +33529744 http://medcezirovka-chet-album_id +33529744 http://medcezirovka-cher.html&language +33529548 http://tyzhdenii_kryim_v_hers/4608𗝖_14 +33529548 http://tyzhdenii_kryim_v_hers/4608𗝖_14 +33529548 http://tyzhdenii_kryim_v_hers/4608𗝖_14 +33529548 http://tyzhdenii_kryim_v_herost-perm=dista +33529548 http://tyzhdenii_kryim_v_herost-perm=dista +33529548 http://tyzhdenii_kryim_v_heroes/?id +33529548 http://tyzhdenii_kryim_v_heroes/?id +33529548 http://tyzhdenii_kryim_v_herkala.hh.ru/neo2/#inbox/?back=141&recipes +33529548 http://tyzhdenii_kryim_v_here=all&filmId=wDt3YsGMq3A +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=qEilHuuWUXI&where=all +33529548 http://tyzhdenii_kryim_v_here=all&filmId=kBGsbFhZHmE +33529548 http://tyzhdenii_kryim_v_here=all&filmId=kBGsbFhZHmE +33529548 http://tyzhdenii_kryim_v_here=all&filmId=kBGsbFhZHmE +33529548 http://tyzhdenie.biz/arabatyur-percy/ +33529548 http://tyzhden.ua/news.tut +33529494 http://pozdravov/politics/101.184574 +33529494 http://pozdravlenii-v-rossiya +33529346 http://yandex.ru/image&lr=65&text=провение +33529346 http://yandex.ru/image&lr=65&text=жажданство +33529300 http://yandsearch?text=стасия","last/woid/igra.html/Article-16.html#object.ru/aim_minutes=0&page/13955768&spn=0.2373&is_settis +33529300 http://yandsearch?text=спертеристика +33529300 http://yandsearch?text=спертеристика +33529300 http://yandsearch?text=Где купить бесплатно&where=all&text=текст&lr=115&lr=213 +33529300 http://yandsearch?text=Где купить бесплатно&where=all&text=текст&lr=115&lr=213 +33529300 http://yandsearch?text=Где купить бесплатно по волчонок +33529300 http://yandsearch?cl4url=wwww.rg.ru/inregistercontent%3D52%26width%3Dhttp://tatskoy-bolum.html/ru/filmId=c172 +33529300 http://yandsearch?cl4url=wwww.rg.ru/inregion43=43&text=корая-страница&clid=9403&lr +33529300 http://yandsearch?cl4url=wwww.radikal.com.tr/Umutsu-ve-en3.html&sig=7c2bd3852134 +33528566 http://mudrosti.ru/cgi-bin/msglishero-vsemag.ru/yandsearch +33528516 http://amkspo-2010-03-2012-30-let-siti.ru/custoshok-video/searcher +33528246 http://auto.yandsearch?text=юлия мурмановидео майнкрафт картак 1 серия&clinic/psafe.com/diziizle +33527130 http://yandsearch?text=в конструкция +33527130 http://yandex.do&returnir_8833654666.html_parali-vo-ves-shop.com/dom-instroisam.com%2Fsmall.com.ua/neo2/#inbox +33527130 http://yandex.do&returney-yuho-voprogrammy/igra-ahedzhi/korole_hincidizi/medgorod +33527130 http://yandex.do&returney-yuho-voprogrammy/igra-ahedzhi/korole_hincidizi/medgorod +33527130 http://yandex.do&returney-yuho-voprogrammy/igra-ahedzhi/korole_hincidizi/medgorod +33527130 http://yandex.do&returney-yuho-voprogrammy/igra-ahedzhi/korole_hincidizi/medgorod +33527130 http://yandex.do&returney-yuho-voprogrammy/igra-ahedzhi/korole_hincidizi/medgorod +33527130 http://yandex.do&returnUrl=http://finance.turmovie/bolshoyvoprostok/62765-fh-585-viyskoy +33527130 http://yandex.do&returnUrl=%2Fhappy birthday=3713/hotelClass.ru/yandex +33527130 http://yandex.do&returnUrl=%2Fhappy birthday=3713/hotelClass.ru/yandex +33527130 http://yandex.do&returnUrl=%2Fhappy birthday=3713/hotelClass.ru/yandex +33527130 http://yandex.do&returnTo=Q833EiAcUXI&path=wizard%3D1027269.139521023600000&language +33527130 http://yandex.do&returnTo=Q833EiAcUXI&path=wizard%3D1027269.139521023600000&lang=ru&lr +33527130 http://yandex.do&returnDateTo=2419171971/?gcv_society/177-yatsya-v-dukhod +33527130 http://yandex.do&returnDateTo=2419171971/?gcv_society/177-yatsya-v-dukhod +33527130 http://yandex.do&return&from=link]-250-mercher.html/ru/land-parkompyuterermalinkvac.php/tophotostransonhaber7.com +33527130 http://yandex.do&return&from=link]-250-mercher.html/ru/land-parkompyuterermalinkvac.php/tophoto.html/ru +33527130 http://yandex.do&return&from=link]-250-mercher.html/ru/land-parkompyuterermalinkvac.php/tophoto.html/ru +33527130 http://yandex.do&referre=all&text=киногород&clid=1697451&lr=213&noreask=1&source +33527130 http://yandex.do&refererigroselki-dlya-terrovenirov/klerkel_zvuki-kofeynye-xrom +33527130 http://yandex.do&refererigroselki-dlya-terrovenirov/klerkel_zvuki-kofeynye-xrom +33527130 http://yandex.do&redition=RU,43,Lipetsins_crd_1991793-fw-1351-wh-634-aciklama-laman-online.ws +33527130 http://mama-terrobegom/cp/110026/biogram/676-tyis--gul-alinki/communit=-1&start=1&quickpay +33525856 http://yandex.ru/video/search?lr +33525856 http://yandex.ru/video/embed/659 +33525856 http://yandex.ru/video/embed/659 +33525822 http://yandex.ua/content&viewer_type=0&ad_info.com.ua/news.liga +33525822 http://yandex.ua/content&viewer.php?routeshant/regist/406-85AB-43F0-92f0-055fea7ep +33525822 http://yandex.ru/yandex.ru/onlinenie-baisvip.php?query=Беларусь +33525791 https://megafon_category/1-chtovo/item/3971/?from=wizard&films/family +33525532 http://xxxsuchku-i-shit_ligii +33525532 http://xxxsuchku-i-shit_ligii +33525532 http://xxxsuchku-i-shit_ligii +33525532 http://xxxsuchku-i-shit_ligii +33525419 https://rus.ru/Интернышов +33525419 https://rus.ru/Интернышов +33525419 https://rus.ru/statyi-i-i-bin/msglistvo +33525419 https://rus.ru/addriver.ru/?api_url=http://meteo +33525327 http://yandsearch?lr=213&msid=2083696&from +33525327 http://yandsearch?lr=213&msid=2&rg_viewcats.aspx +33525276 http://yandex.ru/neo2/#inbox/?back +33525276 http://yandex.ru/neo2/#inbox/?back +33524989 http://maximytishinfo/infodochok +33524989 http://maximytishinfo/infodochok +33524690 http://vampionalnyi-sux.com/fr/#ling.net/video/cfa.su/load +33523429 http://yandex.ru/yandex.ru/yandex +33523429 http://yandex.ru/yandex +33523429 http://yandex.ru/rates +33523021 http://yandex.ru/pro-po-rus-to/today/Visitor-site_id%3D1%26lang +33523021 http://yandex.ru/flirtina-zdorojnoe +33523021 http://yandex.ru/flirtina-zdorojnoe +33523021 http://yandex.php?t=1659.html%3Fid_clips.com +33523021 http://yandex.do&returnUrl=http +33523021 http://kinoptice-ruchni_207815367.shtml?html&language +33523021 http://home-provoloch.ru/yandsearch?text +33523021 http://home-provoloch.ru/yandex.ru/yandex +33523021 http://home-provoloch.ru/yandex.ru/views +33523021 http://home-provoloch.ru/yandex.ru/news.yandex +33523021 http://home-provoloch.ru/world.guns.all.com +33523021 http://home-provoloch.ru/word}&referers_android +33523021 http://home-provoloch.ru/vidomchannerstyle/begemennyy-love +33523021 http://home-provoloch.ru/video/search?lr=213&msid=292324%2F03%2F2014 +33523021 http://home-provoloch.ru/video.html?pid=b473b0c79b3ab2d0b0d18bd0b10747 +33523021 http://home-provoloch.ru/sluzhil-prografii +33523021 http://home-provoloch.ru/sluhar-in-cinka +33523021 http://home-provoloch.ru/semeyen-more/Default.ru/yandex.ru/naborisoedineniya +33523021 http://home-provoloch.ru/novostika-doktoraya-skoraya-griver.ru/politnyi +33523021 http://home-provoloch.ru/news.yandsearch?cl4url=wwww.hurriyet.com +33523021 http://home-provoloch.ru/meetindal_sub=26.04919705/reporno.com.ua +33523021 http://home-provoloch.ru/index.ru/11638455b9febd2 +33523021 http://home-provoloch.ru/home.net/range&text +33523021 http://home-provoloch.ru/?yaso-porniac.com/yozga_viyskaya-aksession +33523021 http://future +33523021 http://future +33523021 http://future +33523021 http://future +33523021 http://future +33523021 http://future +33523021 http://future +33523021 http://future +33523021 http://future +33523021 http://future +33522421 https://worldofwarplanshet.html/hu/priceli-yayconsultatistart&s=89628/?from=6009155113/ghost%3D6459119&nohead=2&job_index.ru/politics +33522421 https://worldoftankov-gormovie/6639/doram.ru/moscow/plama-alphant.ru/2327/univer +33522421 https://worldoftankov-gormovie/6639/doradonloads/Как_и_волжской_округловы +33522421 https://worldoftankov-gormovie/6639/doradonloads/Как_и_волжской_округловы +33522421 https://worldoftankov-355678268/?promo&limitings=27103052/?cid=5883-prodam +33522421 https://worldoftankov-355678268/?promo&limitine_6/ +33522421 https://worldoftankov-355000e43fd419fbf2c35cf +33522421 https://world/ukrasotyi-prog.html?uid=ajZ1DLp0/mark_app_id=vyZ1DLClUJZlcJG1F4XldSeWtvVkFkf38xIAEsQVgWCVtUekB +33522421 https://world/ukrasotyi-prog.html?uid=ajZ1DLp0/mark_app_id=vyZ1DLClUJZlcJG1F4XldSeWtvVkFkf38xIAEsQVgWCVtUekB +33522421 https://world.eu/index.ru/optik.ua/yandsearch?text=стать стихи для с мажор +33522421 https://world.eu/index.ru/optik.ua/yandsearch?text=смотреть +33521626 http://yandex.ru/yandsearch?lr=44&redict/season +33521536 http://russinskie_otnostno-vsegodnya +33520995 http://a1.s7.fsimg.ria.coccoc.com.br/instruktov_perm.ru/news_6_sot._snt_detay +33520995 http://a1.s7.fsimg.info/meden-seriya-hollassniki.ru/Oboi +33520995 http://a1.s7.fsimg.com/vse_seksklyuchenie-igry +33520995 http://a1.s7.fsimg.chitels/es-model.xml?typ=RET&anbietersburg +33520995 http://a1.s7.fsimg.avoncon.net.ru/rabota.ru/politics/174880%2F9999996657844.html#top_apress +33520925 http://yandex.ru/shop/wotlauncher +33520579 http://traveldudingsportal.ru/sverhosti.ua +33519953 https://e.mail=1&quickpay=38cd0b3d0b2d0bed180d177b-99f0-055f44e7f142bb41c743890 +33519953 https://e.mail.yandsearch?clid=479802000/000286567.html/ru/news.yandsearch?lr=43&noreask=1&source +33519953 https://e.mail.yandsearch?clid=479802000/000286567.html/ru/news.yandsearch?lr=43&noreask=1&source +33519953 https://e.mail.yandsearch?clid=479802000/000286567.html/ru/news.yandsearch?lr=43&noreask=1&source +33519953 https://e.mail.yandsearch?clid=479802000/000286567.html/ru/news.yandsearch?lr=43&noreask=1&source +33519953 https://e.mail.yandsearch?clid=1_30008434634632.html?vac_page3/out-of-tanceret545 +33519953 https://e.mail.yandsearch?clid=1_100002c&_cpofid=5310758&text=ГЕРОБРАЗОВОЕ +33519953 https://e.mail.yandsearch?cl4url=www.milii.ru/politics/1892.32279/product/134.4132/164310231099_2991eaealty +33519953 https://e.mail.yandex.ru/yandsearch&area=vac&subishi_dlya +33519953 https://e.mail.yandex.ru/msk/12812112538%26bn%3D3239658%26bt +33519953 https://e.mail.yandex.ru/msk/12812112538%26bn%3D3239658%26bt +33519953 https://e.mail.yandex.ru/medley7/article/14Mar2014/91243-v-kharniturgical-budet-haber/yandex.ru/kategory_from=odnoklassniki.ru +33519953 https://e.mail.yandex.ru/7/champaign=nw&utm +33519953 https://e.mail.ru/yandex.ru/c/m.exe/htm/wotlauncher.ru/naifl.ru/actualiday=1#video/search?textpesni_duhovke-vozil-ex.ru/?clid=13954055 +33519953 https://e.mail.ru/neo2/#inbox/ +33519953 https://e.mail.ru/neo2/#inbox/ +33519953 https://e.mail.ru/bit2bit.net/209228_900b9018.html?id=c19 +33519953 https://e.mail.ru/beremennok/sharelii.ru&devices-brics/89822/rosbalta.ru/mail.rambler.ru +33519953 https://e.mail.ru/Portal_onlayn-igrovya/29474?sid=246411740685&country_params=rhost%3D52%26pz%3D11555_Turkey.com/viewer_novily_dlja +33519674 http://best.kupit.ru/yandex.ru/sankt-petrovishi.ru/2004d3c53 +33519674 http://best.kupit.ru/yandex.ru/sankt-peter=814409631/?from=ohmb_city=0&uinfo=ElsdCQJUQxJThCwNSAxI3MSc4LiY4AxtuASgHIAgbOn98cV9TVkZNcQsyXkZ4 +33519236 http://yandex.ru/page.biz/17/99999/2014/19240&PrefFrend +33518904 http://rambler.ru/stattorgnut-Krimult +33518904 http://rambler.ru/stattorg.ua/hobbilein/msglistakeforex +33518904 http://rambler.ru/stattorg.ua/hobbile +33518904 http://rambler.ru/stattorg.ru/106460a +33518904 http://rambler.net/5927&text=говориль +33518904 http://rambler.net/5927&text=говориль +33518406 http://delta/htm/wot/htm/wotlauncher.ru/prography.html +33518406 http://delta/htm/wot/htm/wotlauncher.ru/prography.html +33518406 http://delta/htm/wot/htm/wotlauncher.ru/prography.html +33518384 http://svprestatusi_prede +33518384 http://svpressid=238&lr=63 +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpressages.yandex.ru/ekonov +33518384 http://svpress.ykt.ru +33518384 http://svpress.yandex.ru/yandex.ru/memoryachi/zhere +33518384 http://svpress.yandex.ru/questions/volgogram/1-koman +33518384 http://svpress.yandex.ru/messabota +33518384 http://svpress.com/webhp?espv=1&ie=UTF-8&l=vi&p +33518384 http://svpress.com/webhp?espv=1&ie=UTF-8&l=vi&p +33518384 http://svpress.com/staforizaciya-anars.com.tr&user +33518384 http://svpress.com/staforizaciya-anars.com.tr&user +33518384 http://svpress.com/istanoy-v +33518384 http://svpress.com/apps&lc_name +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chulkax-v-dengangbalta +33518384 http://svpress-chto-dizajn-trip.bg/новости телефоны +33518384 http://svpresennyy-bosch +33518384 http://svpresennyy-bosch +33518384 http://svpresennium.com/adam/istory +33518384 http://svpresennitsya_vzhe_pravkatalog +33518384 http://svpresennij-sposolstory +33518384 http://svpresennie-za-fenews.mail.ru/news/sankcii-products/vsepostupaemyj +33518384 http://svpresennie-inculatotv.tv +33518384 http://svpresennie-e-n-v/mediaget +33518384 http://svpresennie-1905-voevatelya +33518384 http://svpresennetteyzen.com/categoryId +33518384 http://svpresennet.ua/yandsearch?cl4url=rusnoe-komani +33518384 http://svpresennet.ua/news.yandex +33518384 http://svpresennet.com/load/gta_4_godetoedet +33518384 http://svpresennet.com/load/godonskim.html +33518384 http://svpresennet.com/ghitsya-vzjali +33518384 http://svpresennet.com/?cid=1_843 +33518384 http://svpresennet.com.tr/D/Genel +33518384 http://svpresennet.com.tr/?win=104&company=5 +33518384 http://svpresenner_viewertyvanlatım +33518384 http://svpresenner/5bd56db9c088f0 +33518384 http://svpresenne-vinny_frommail +33517550 http://telebek.com/notification.by/yandsearch.html +33516942 http://yandex.ua/describe.ru/yandex.ru/kak-managemennoletat +33516942 http://yandex.ua/Shou-biri-dnjatija-vo-vremennoafishin/zhensk.ua/ukranov/56 +33516934 http://news.yandex.ua/world +33516664 http://yandex.ru/yandex +33516516 http://skazan/applanet_seengliysk +33516436 http://childs=1000de +33516285 http://coccoc.com/Informalnaya-trax.com.ru/politics +33516126 http://forum.horo.mail.ru/volgoprove-did39 +33515878 http://yandex.ru/city=&name=Индийского развитие +33515401 http://rg.ru%2Fplatno-kurganizmantasy-loko.ru/games.mail.yandsearch?cl4url=http://worldoftanks.ru/news/231518 +33515401 http://rg.ru%2Fplatno-kurganizmantasy-loko.ru/games.mail.yandsearch?cl4url=http://worldoftanks.ru/news/231518 +33515401 http://rg.ru%2Fplatno-kurganizmantasy-lohanks.eu/play.ru/autop&text=чемпионов зодиака смотреть фиат альма новые-вычет зубна&clid=1979770d066e1 +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515401 http://rg.ru%2Fplanet.ru/ulyava +33515167 http://clients/28767-nancy/ust-v-sina-nevestion/poppimer +33514747 http://yandex.ru/new/ads +33514322 http://center.ru/daily/svadba +33513741 http://cum.mobile=0&clid=9403&lr=100&targets-hint=Ваше образца 20 03 2014/91159198 +33513616 http://news/1467113/ghost%3D52 +33512719 http://m.net/ukrinfo=ww-1263 +33512719 http://m.net/join/43-1-0-340138002aa +33512719 http://m.net/join/43-1-0-340138002aa +33512719 http://m.net/555/?fromName_geldigitar.com/page.aspx&refplaces +33512719 http://m.net/548155&text=когда в рошем качество +33512719 http://m.net/548155&text=когда в рошем качество +33512719 http://m.net/548155&text=когда в рошем качество +33512719 http://m.net/548155&text=когда в рошем качество +33512719 http://m.net/221396/http://news.do?u_krymu-pro.com/webhp +33512719 http://m.net/219/review/21204%26carfinABTest +33512719 http://m.net/212549&z=12&lr=20139162721&text +33512396 http://my.mail.ru/part3.mozi.com/aciklyuchebnoe-prezerogramming.net +33512016 https://deep=1 +33512016 https://deep=1 +33512016 https://deep=1 +33511763 http://mynet.ua/yandex.ru/product +33511664 http://yandex.ru/tolko_chit-novye-vyira-Superjob.ru/board +33511664 http://yandex.ru/pelichkoj.html_partner.by/брение орная фильмы&where=Москва&from=vb-chromo=1036237/man/moto-vosti_mir__tymoshina.html +33511664 http://yandex.ru/pelichkoj.html_partner.bjk.html/ru/video/embed/6959&referatura.html?id=148&ref[0][registema/article/301282361-timlari-v-ukraini-misterclock_new=0&pgesize=10036-salestate=100136/js/direct=face.com&publ/launcher.html?browsers/62610&text=однокласс +33511664 http://yandex.ru/pelichkoj.html_parhomchyk_2/?cat=1820273&noreask=1&source=psearch +33511664 http://yandex.ru/pelichkoj.html_parhomchyk_2/?cat=1820273&noreask=1&source=psearch +33511664 http://yandex.ru/pelichkoj.html_parhomchelyambler.ru%2Fpolitics.cntd.ru/l/chart +33511664 http://yandex.ru/pelichkoj.html_params=bid%3D243500040317/27870892&msid=208 +33511664 http://yandex.ru/pelichkoj.html_params=bid%3D243500040317/27870892&msid=208 +33511664 http://yandex.ru/pelichkoj.html_params=bid%3D243500040317/27870892&msid=208 +33511664 http://yandex.ru/pelichkoj.html_paradikal.com/ilanie-obama-lizhnyyeleries/ukraine.ru/?auth_key=a65625a2bd6a1212d8a2109.jpg","lasti-prigotovyatsiya-k-pravo-vneochatelemena/i/O2NmYS5zdTtTUFBDO2ltcDtjcmVkaXQ7?_rnd=2434802143/?frommatiza/ +33511664 http://yandex.ru/pelichkoj.html_para.html/ru/list.ru/yandex.ru/20140316&lr=143 +33511664 http://yandex.ru/messages.yandex.htm +33511664 http://yandex.ru/cgi-bin/readsale/programmy/igri_bottogovaya +33511664 http://yandex.ru/cgi-bin/readsale/programmy/igri_bottogovaya +33511664 http://yandex.ru/catalog/anner +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33511664 http://yandex.ru/?strigki-2007-prodam_iz_sng.html&lang +33510595 http://chel.ru/politics/2014/ft/resh-anons +33509478 http://lenta.php/ru/video/torrealth/next официальные +33509473 https://m.sprashivai.ru/news.yandsearch[engindex.ru/compeii +33509211 http://mail.ru/infinited-microchurchi.html&lang=ru&lr=213&text=microssii.com.tr_mobil&id=72217356880 +33509211 http://mail.ru/infinited-microchnolgi-namenila +33509211 http://mail.ru/infinited-microchnitsa-anneksisozluk.com.tr/yandex.ru/yandsearch?clid=47710/?33vkb +33509211 http://mail.ru/infinited-microchka-pervy/benetra/detmi/salakomstva.ru/watch/GetInfo/index.ru/vladtime.in.ua/polismetisti +33509211 http://mail.ru/infinited-microchistkinobig&mb +33509211 http://mail.ru/infinited-microchevyh-svadebnye-special-2-skan-erdogan-ifada&text=порно +33509119 http://cumhuriyet.com.ua/ru/legok-novosti-bulation/taunhaus-s-05_2014/03/http://inosty.ru/news/2014/903843&text +33508674 http://server=ff000000E0WtY0PX0 +33508571 http://allogs/487838002003&text=массам +33508571 http://allogs/487838002003&text=массам +33508571 http://allogs/48783631/?frommail.ru/pda.net/gruzoviyah-Pytings/d/depo.ru/series/ural.ru/catalog.ru%2Findex.ru/copypastoyt-na-amonkey +33508571 http://allogs/48783399760.13954034.html?MG=2 +33508571 http://allogs/487832945-zhizn/zhenskaya/read.php +33508571 http://allogs/48783220/80 +33508571 http://allogs/487830&text=одноклассники +33508571 http://allogs/48783-solitics/17476480&lang=en&clid=195f540a8aa0dd5f7c +33508571 http://allogs/48783-shkolja-chit-petro[2]=&year][max]=300000001504 +33508571 http://allogs/48783-shkolja-chit-petro[2]=&year][max]=300000001504 +33508378 http://news%2F1000211214662012.html?play +33507942 https://shou-posta-audiovkontent +33507710 http://tv.yandsearch +33507710 http://tv.yandsearch +33507491 https://echo/realth.mail.ru/index.ru/Vidy_na_granpartments.com/menu_id=2055440421.13955691 +33507247 http://lostfilmId=eQBcVS5EOXAPJgcTeXEODBVyTAAoc24HKh +33506962 http://yandex.ru/peopleniya_kompaniyah-i-skanding +33506962 http://yandex.ru/peopleniya_kompaniyah-i-skanding +33506609 http://sprovideolovu.html#44 +33506581 http://yandsearch?clid=9403&lr=35&mb_url=http +33506211 http://clan.html?item +33506004 http://otzyvaet-zacheski/2014 +33505909 http://politics/news.mail.ru/2011.ru/yandex.ru +33505909 http://politics/3c0a6563 +33505909 http://politics/330/stars/usersedes +33505864 http://kommunity/444 +33505536 http://tumby-izbank.ru/neo2/#inbox/?lang=ru&lr=141 +33505536 http://tumby-iz-droga.com/webhp?espv +33505536 http://tumby-iz-droga.com/webhp?espv +33505536 http://tumby-iz-droga.com/webhp?espv +33505536 http://tumby-iz-droga.com/index.ru/ +33505536 http://tumby-iz-droga.com/index.ru/ +33505536 http://tumby-iz-droga.com/index.ru/ +33505536 http://tumby-iz-droga.com/index.ru/ +33505536 http://tumby-iz-droga.com.tr&callback +33505536 http://tumby-iz-droga.coccoc.com.tr +33505536 http://tumby-iz-dont_55108772351961-101061909644&text +33505536 http://tumby-iz-dont_5510788/?from=splate.yandex +33505536 http://tumby-iz-doneckout=1440&is_mobile=0&ads_apps.branchor-Speed +33505536 http://tumby-iz-donec +33505536 http://tumby-iz-domovest.ru/weather +33505536 http://tumby-iz-domost&lang=ru_RU&back_url +33505536 http://tumby-iz-domavku-na-17-64 +33505536 http://tumby-iz-domain=hurriyet.com/detail.ru/news.yandsearch +33505536 http://tumby-iz-dom2.ru/yandex.ru/personal/yabancial/kuhnem-reshuriyet +33505536 http://tumby-iz-dolzhskiy +33505536 http://tumby-iz-dolzhna-bobruchik-mail +33505536 http://tumby-iz-dolzhna-bobruchik-mail +33505536 http://tumby-iz-dolzhenshhiny-dsx/alls/rubric[0]=29&check_id]=1&rawdata/adpreview +33505536 http://tumby-iz-dolg +33505536 http://tumby-iz-dokto +33505536 http://tumby-iz-doka-vo-v-inetv.sx/evera-model/27906.html +33505536 http://tumby-iz-dok.ru/5072&text=стоит купе +33505536 http://tumby-i-zalog/detail/worldoftanks.ru/forum/v_odintsii +33505536 http://tumby-i-zalog/detail/world/562949990&noreask=1 +33505536 http://tumby-i-zalog/detail/world.ru/yandex.ru/ecobal +33505536 http://tumby-i-zalog/detail/womenskaya.net/football/ukraine +33505536 http://tumby-i-sydney.yandsearch +33505536 http://tumby-i-sydney +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gotovy-nemeckaja +33505536 http://tumby-i-gaf-yaptions/q258616&group/ru/domashi-mnenii +33505536 http://tumby-i-gaf-yaptions/q2538-fh-4554&text=01&uinfo/fotki +33505536 http://tumby-i-gaf-yaptions/q2538-fh-4554&text=01&uinfo/fotki +33505536 http://tumby-i-gaf-yaptions/q2531/meteo.ua/cat-maski.yandex +33505536 http://tumby-i-gaf-yaptions/q225819743&lr=38&msid=20719545 +33505536 http://tumby-i-gaf-yaptions/q22537&text=джамайкл +33505536 http://tumby-i-gaf-yaptions/q22537&text=джамайкл +33505536 http://tumby-i-gaf-yaptions/q2084%26ar_page=0&ad_info=ElsdCQBaQ1JgAA1cEEVUXiN2A1RzBx5pU +33505536 http://tumby-i-gaf-yaptions/q2084%26ar_page.com/sagitnaja-granit +33505536 http://tumby-i-gaf-yaptions/q208049&lr=1103/?from]=2508&os +33505536 http://tumby-i-gaf-yaptions/q208&lr=14&msid=22892.1140&domain +33505536 http://tumby-i-gaf-yaptions/q208&group_id=66289.html/ru/launcher +33505536 http://tumby-i-gaf-yaptions/q200 +33505536 http://tumby-i-gaf-yaptions/q200 +33505536 http://tumby-i-gaf-yaptions/q200 +33505536 http://tumby-i-gaf-yaptions/q200 +33505536 http://tumby-i-gaf-yapti.ru/video/online +33505536 http://tumby-i-gaf-yapti.ru/politics/news +33505536 http://tumby-i-gaf-yapti.ru/politics/17/12/2495.html/ru/Politics +33505536 http://tumby-i-gaf-yapti.ru/inregion_type +33505536 http://tumby-i-gaf-yapti.ru/index.jhtml +33505536 http://tumby-i-gaf-yaponse":null}&referer +33505536 http://tumby-i-gaf-yapon/rusfisha.yandex.ru/inworld%2F300628431.shtml +33505536 http://tumby-i-gaf-yapon/rus/katalog/s/10012989-ios/382983/1/?h=f3ea +33505536 http://tumby-i-gaf-yapon/rukove-wallpaper.ru/yandex.ru/news/news +33505536 http://tumby-i-gaf-yapon/rukopasnoo.com/beconomi/monstructure.ru +33505536 http://tumby-i-gaf-yapon/rukol/3444.61655095001&source=wizard&filmId +33505536 http://tumby-i-gaf-yapon/rukodellik-141588941001653-fh-582-echomske +33505536 http://tumby-i-gaf-yapon/rukodelirnye-pravda-i-avel/120628&text=референие +33505536 http://tumby-i-gaf-yapon/rukodelirnye-pravda-i-avel/120628&text=референие +33505536 http://tumby-i-gaf-yapon/ru/strech-voorolek.com/iframe/frm_index +33505536 http://tumby-i-gaf-yapon/ru/steko.ru/mykazala-domarks&count/akcija +33505536 http://tumby-i-gaf-yapon/ru/season_yandex.ru/building_matika/6006291 +33505536 http://tumby-i-gaf-yapon/ru/protyv-rossian.ru/world/ukrainoy_vystuplenie +33505536 http://tumby-i-gaf-yapon/ru/neo2/#inbox/?back_url=http://yandsearch +33505536 http://tumby-i-gaf-yapon/ru/lottories/mult/ru/lite/info.ru/yandsearch +33505536 http://tumby-i-gaf-yapon/ru/launcher.html#post88580657429649.htm +33505536 http://tumby-i-gaf-yapon/relax.ru/video/embed/4669dcebilitsev +33505536 http://tumby-i-gaf-yapon/refilmId=LUlODAlRDAlQjUlRDAlQjAlMjZidCUFJiQgsBdx9 +33505536 http://tumby-i-gaf-yapon/ref-11e3-bareviews/vehicle_700/forumu/widget +33505536 http://tumby-i-gaf-yapon/recipes/predsestrukturist.html_parhomchyk +33505536 http://tumby-i-gaf-yapon/realtyim-ne-andz.tv/adwidget +33505536 http://tumby-i-gaf-yapon/realty.ru/studies/catalog.ru/novoe-post +33505536 http://tumby-i-gaf-yapon/realnuyut-kremle-sex-shokolove +33505536 http://tumby-i-gaf-yapon/real/season/929451.1395343110877/?actions +33505536 http://tumby-i-gaf-yapon/reads/vinti-products/view.html?id=79323 +33505536 http://tumby-i-gaf-yapon/readme.ru/free?r=a2g_48122900 +33505536 http://tumby-i-gaf-yapon/readfic/102237/intimchangestom +33505536 http://tumby-i-gaf-yapon/read=10914622-echo.msk.ru/news.mailbox.ru +33505536 http://tumby-i-gaf-yapon/read/upravda.com/galimat.com/iframe +33505536 http://tumby-i-gaf-yapon/read/3474.html?id=5113adcentervalov +33505536 http://tumby-i-gaf-yapon/read/343606591&text=красноярск +33505536 http://tumby-i-gaf-yapon/read/270/n4p/4164%26bid%3D3712&lr=30&nost +33505536 http://tumby-i-gaf-yapon/read.php?page=http://sozdatochet +33505536 http://tumby-i-gaf-yapon/read.php/Городавцова Наталь +33505536 http://tumby-i-gaf-yapon/read.adrive-arabota-cekiler/yurthaber +33505536 http://tumby-i-gaf-yapon/raznyh-orgirl/politics-torreviewtopic/286699416 +33505536 http://tumby-i-gaf-yapon/razmnozhi_i_set=googogoman.ru/product_7168 +33505536 http://tumby-i-gaf-yapon/razmnozhi_i_set=googogoman.ru/product_7168 +33505536 http://tumby-i-gaf-yapon/razmnozhi_i_set=googogoman.ru/product_7168 +33505536 http://tumby-i-gaf-yapon/razmnozhi_i_set=googogoman.ru/product_7168 +33505536 http://tumby-i-gaf-yapon/razmnozhi_i_set=googogoman.ru/product_7168 +33505536 http://tumby-i-gaf-yapon/razlozhi.html&language=0&auth_sig=89a2cfbd +33505536 http://tumby-i-gaf-yapon/ratings=25908.5847&text=вконтроль +33505536 http://tumby-i-gaf-yapon/rais-list?folder=0&new +33505536 http://tumby-i-gaf-yapon/rais-list?folder=0&new +33505536 http://tumby-i-gaf-yapon/rais-list?folder=0&new +33505536 http://tumby-i-gaf-yapon/rais-list?folder=0&new +33505536 http://tumby-i-gaf-yapon/raint-pelmenitoff.ru/user=1&auth.aspx?wt +33505536 http://tumby-i-gaf-yapon/raile.com/list?lang=ru&frame/?lang=ru&clid +33505536 http://tumby-i-gaf-yapon/rada.com.tr/displat.ru/aboutiquests/creative diff --git a/tests/queries/1_stateful/00181_cross_join_compression.sql b/tests/queries/1_stateful/00181_cross_join_compression.sql new file mode 100644 index 00000000000..2e08f1081e3 --- /dev/null +++ b/tests/queries/1_stateful/00181_cross_join_compression.sql @@ -0,0 +1,3 @@ +CREATE VIEW unit AS (SELECT 1); + +SELECT CounterID, StartURL FROM unit, test.visits ORDER BY (CounterID, StartURL) DESC LIMIT 1000; \ No newline at end of file diff --git a/utils/antlr/ClickHouseParser.g4 b/utils/antlr/ClickHouseParser.g4 index 13194a8c2d2..30e1cd10f8f 100644 --- a/utils/antlr/ClickHouseParser.g4 +++ b/utils/antlr/ClickHouseParser.g4 @@ -218,9 +218,17 @@ insertStmt: INSERT INTO TABLE? (tableIdentifier | FUNCTION tableFunctionExpr) co columnsClause: LPAREN nestedIdentifier (COMMA nestedIdentifier)* RPAREN; dataClause - : FORMAT identifier # DataClauseFormat - | VALUES # DataClauseValues - | selectUnionStmt SEMICOLON? EOF # DataClauseSelect + : FORMAT identifier # DataClauseFormat + | VALUES assignmentValues (COMMA assignmentValues)* # DataClauseValues + | selectUnionStmt SEMICOLON? EOF # DataClauseSelect + ; + +assignmentValues + : LPAREN assignmentValue (COMMA assignmentValue)* RPAREN + | LPAREN RPAREN + ; +assignmentValue + : literal ; // KILL statement diff --git a/utils/c++expr b/utils/c++expr index c70a4c7d382..8cf5d3a3b16 100755 --- a/utils/c++expr +++ b/utils/c++expr @@ -7,11 +7,13 @@ USAGE: c++expr [-c CXX | -C | -I] [-i INCLUDE] [-l LIB] [-b STEPS] [-t TESTS] [- OPTIONS: -c CXX use specified c++ compiler -C use cmake + -k keep generated worktree -I integrate into ClickHouse build tree in current directory -i INC add #include -l LIB link against LIB (only for -I or -C) -b STEPS_NUM make program to benchmark specified code snippet and run tests with STEPS_NUM each -b perf-top run infinite benchmark and show perf top + -B build-dir build directory for -I (default: "build") -t TESTS_NUM make program to benchmark specified code snippet and run TESTS_NUM tests -o FILE do not run, just save binary executable file -O CXX_OPTS forward option compiler (e.g. -O "-O3 -std=c++20") @@ -37,6 +39,7 @@ GLOBAL= OUTPUT_EXECUTABLE= INCS="vector iostream typeinfo cstdlib cmath sys/time.h" LIBS="" +BUILD_DIR=build BENCHMARK_STEPS=0 RUN_PERFTOP= BENCHMARK_TESTS=5 @@ -45,13 +48,14 @@ USE_CLICKHOUSE= CXX=g++ CXX_OPTS= CMD_PARAMS= +KEEP_WORKTREE=0 # # Parse command line # if [ "$1" == "--help" ] || [ -z "$1" ]; then usage; fi -while getopts "vc:CIi:l:b:t:o:O:g:" OPT; do +while getopts "vc:CIi:l:bkB:t:o:O:g:" OPT; do case "$OPT" in v) set -x; ;; c) CXX="$OPTARG"; ;; @@ -60,6 +64,8 @@ while getopts "vc:CIi:l:b:t:o:O:g:" OPT; do i) INCS="$INCS $OPTARG"; ;; l) LIBS="$LIBS $OPTARG"; ;; b) if [ "$OPTARG" = perf-top ]; then BENCHMARK_STEPS=-1; RUN_PERFTOP=y; else BENCHMARK_STEPS="$OPTARG"; fi; ;; + B) BUILD_DIR="$OPTARG"; ;; + k) KEEP_WORKTREE=1; ;; t) BENCHMARK_TESTS="$OPTARG"; ;; o) OUTPUT_EXECUTABLE="$OPTARG"; ;; O) CXX_OPTS="$CXX_OPTS $OPTARG"; ;; @@ -110,11 +116,11 @@ find_clickhouse_root () { find_clickhouse_build () { local CLICKHOUSE_ROOT="`find_clickhouse_root`" - if [ -e "$CLICKHOUSE_ROOT/build/CMakeCache.txt" ]; then - echo "$CLICKHOUSE_ROOT/build" + if [ -e "$CLICKHOUSE_ROOT/$BUILD_DIR/CMakeCache.txt" ]; then + echo "$CLICKHOUSE_ROOT/$BUILD_DIR" return 0 fi - echo "error: $CLICKHOUSE_ROOT/build/CMakeCache.txt doesn't exist" + echo "error: $CLICKHOUSE_ROOT/$BUILD_DIR/CMakeCache.txt doesn't exist" return 1 } @@ -144,13 +150,17 @@ if [ -n "$USE_CLICKHOUSE" ]; then echo "add_subdirectory ($SUBDIR)" >>$CALL_DIR/CMakeLists.txt cleanup() { mv $CALL_DIR/CMakeLists.txt.backup.$$ $CALL_DIR/CMakeLists.txt - rm -rf $WORKDIR - rm -rf ${BUILD_ROOT}${CLICKHOUSE_PATH} + if [ $KEEP_WORKTREE -eq 0 ]; then + rm -rf $WORKDIR + rm -rf ${BUILD_ROOT}${CLICKHOUSE_PATH} + fi } else WORKDIR=/var/tmp/cppexpr_$$ cleanup() { - rm -rf $WORKDIR + if [ $KEEP_WORKTREE -eq 0 ]; then + rm -rf $WORKDIR + fi } fi diff --git a/utils/changelog/README.md b/utils/changelog/README.md index ccc235c4990..4b16c39a3fe 100644 --- a/utils/changelog/README.md +++ b/utils/changelog/README.md @@ -6,7 +6,7 @@ Generate github token: Dependencies: ``` sudo apt-get update -sudo apt-get install git python3 python3-fuzzywuzzy python3-github +sudo apt-get install git python3 python3-thefuzz python3-github python3 changelog.py -h ``` @@ -15,10 +15,7 @@ Usage example: Note: The working directory is ClickHouse/utils/changelog ```bash -export GITHUB_TOKEN="" - -git fetch --tags # changelog.py depends on having the tags available, this will fetch them. - # If you are working from a branch in your personal fork, then you may need `git fetch --all` +GITHUB_TOKEN="" python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$GITHUB_TOKEN" v21.6.2.7-prestable python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$USER" --gh-password="$PASSWORD" v21.6.2.7-prestable diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py index 6b70952eced..314461a6b3a 100755 --- a/utils/changelog/changelog.py +++ b/utils/changelog/changelog.py @@ -3,18 +3,20 @@ import argparse import logging -import os.path as p import os +import os.path as p import re from datetime import date, timedelta -from subprocess import CalledProcessError, DEVNULL +from subprocess import DEVNULL, CalledProcessError from typing import Dict, List, Optional, TextIO -from fuzzywuzzy.fuzz import ratio # type: ignore -from github_helper import GitHub, PullRequest, PullRequests, Repository from github.GithubException import RateLimitExceededException, UnknownObjectException from github.NamedUser import NamedUser -from git_helper import is_shallow, git_runner as runner +from thefuzz.fuzz import ratio # type: ignore + +from git_helper import git_runner as runner +from git_helper import is_shallow +from github_helper import GitHub, PullRequest, PullRequests, Repository # This array gives the preferred category order, and is also used to # normalize category names. @@ -25,6 +27,7 @@ categories_preferred_order = ( "New Feature", "Performance Improvement", "Improvement", + "Critical Bug Fix", "Bug Fix", "Build/Testing/Packaging Improvement", "Other", @@ -57,9 +60,10 @@ class Description: self.entry, ) # 2) issue URL w/o markdown link + # including #issuecomment-1 or #event-12 entry = re.sub( - r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + r"([^(])(https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})[-#a-z0-9]*)", + r"\1[#\3](\2)", entry, ) # It's possible that we face a secondary rate limit. @@ -96,23 +100,20 @@ def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]: # obj._rawData doesn't spend additional API requests # We'll save some requests # pylint: disable=protected-access - repo_name = pr._rawData["base"]["repo"]["full_name"] # type: ignore + repo_name = pr._rawData["base"]["repo"]["full_name"] # pylint: enable=protected-access if repo_name not in repos: repos[repo_name] = pr.base.repo in_changelog = False merge_commit = pr.merge_commit_sha - try: - runner.run(f"git rev-parse '{merge_commit}'") - except CalledProcessError: - # It's possible that commit not in the repo, just continue - logging.info("PR %s does not belong to the repo", pr.number) + if merge_commit is None: + logging.warning("PR %s does not have merge-commit, skipping", pr.number) continue in_changelog = merge_commit in SHA_IN_CHANGELOG if in_changelog: desc = generate_description(pr, repos[repo_name]) - if desc is not None: + if desc: if desc.category not in descriptions: descriptions[desc.category] = [] descriptions[desc.category].append(desc) @@ -187,7 +188,7 @@ def parse_args() -> argparse.Namespace: # This function mirrors the PR description checks in ClickhousePullRequestTrigger. -# Returns False if the PR should not be mentioned changelog. +# Returns None if the PR should not be mentioned in changelog. def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]: backport_number = item.number if item.head.ref.startswith("backport/"): @@ -270,7 +271,6 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri category, ): category = "Bug Fix (user-visible misbehavior in an official stable release)" - return Description(item.number, item.user, item.html_url, item.title, category) # Filter out documentations changelog if re.match( @@ -299,8 +299,9 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri return Description(item.number, item.user, item.html_url, entry, category) -def write_changelog(fd: TextIO, descriptions: Dict[str, List[Description]]): - year = date.today().year +def write_changelog( + fd: TextIO, descriptions: Dict[str, List[Description]], year: int +) -> None: to_commit = runner(f"git rev-parse {TO_REF}^{{}}")[:11] from_commit = runner(f"git rev-parse {FROM_REF}^{{}}")[:11] fd.write( @@ -358,6 +359,12 @@ def set_sha_in_changelog(): ).split("\n") +def get_year(prs: PullRequests) -> int: + if not prs: + return date.today().year + return max(pr.created_at.year for pr in prs) + + def main(): log_levels = [logging.WARN, logging.INFO, logging.DEBUG] args = parse_args() @@ -411,8 +418,9 @@ def main(): prs = gh.get_pulls_from_search(query=query, merged=merged, sort="created") descriptions = get_descriptions(prs) + changelog_year = get_year(prs) - write_changelog(args.output, descriptions) + write_changelog(args.output, descriptions, changelog_year) if __name__ == "__main__": diff --git a/utils/changelog/requirements.txt b/utils/changelog/requirements.txt index 106e9e2c72d..53c3bf3206e 100644 --- a/utils/changelog/requirements.txt +++ b/utils/changelog/requirements.txt @@ -1,3 +1,2 @@ -fuzzywuzzy +thefuzz PyGitHub -python-Levenshtein diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 1457eacbc92..c35e860a5d7 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -6,6 +6,7 @@ AMPLab AMQP ANNIndex ANNIndexes +ANOVA AORM APIs ARMv @@ -29,13 +30,6 @@ Alexey AnyEvent AppleClang Approximative -arrayDotProduct -arrayEnumerateDenseRanked -arrayEnumerateUniqRanked -arrayFirstOrNull -arrayLastOrNull -arrayPartialShuffle -arrayShuffle ArrayJoin ArrowStream AsyncInsertCacheSize @@ -184,7 +178,6 @@ ComplexKeyCache ComplexKeyDirect ComplexKeyHashed Composable -composable Config ConnectionDetails Const @@ -453,6 +446,7 @@ KafkaLibrdkafkaThreads KafkaProducers KafkaWrites Kahan +Kaser KeeperAliveConnections KeeperMap KeeperOutstandingRequets @@ -462,8 +456,6 @@ KittenHouse Klickhouse Kolmogorov Konstantin -kostik -kostikConsistentHash Korzeniewski Kubernetes LDAP @@ -474,6 +466,8 @@ LLVM's LOCALTIME LOCALTIMESTAMP LONGLONG +LOONGARCH +Lemir Levenshtein Liao LibFuzzer @@ -491,6 +485,7 @@ LocalThreadActive LogQL Logstash LookML +LoongArch LowCardinality LpDistance LpNorm @@ -565,17 +560,6 @@ MindsDB Mongodb Monotonicity MsgPack -multiSearchAllPositionsCaseInsensitive -multiSearchAllPositionsCaseInsensitiveUTF -multiSearchAnyCaseInsensitive -multiSearchAnyCaseInsensitiveUTF -multiSearchAnyUTF -multiSearchFirstIndexCaseInsensitive -multiSearchFirstIndexCaseInsensitiveUTF -multiSearchFirstIndexUTF -multiSearchFirstPositionCaseInsensitive -multiSearchFirstPositionCaseInsensitiveUTF -multiSearchFirstPositionUTF MultiPolygon Multiline Multiqueries @@ -677,8 +661,8 @@ OSUserTimeNormalized OTLP OUTFILE ObjectId -Observability Oblakov +Observability Octonica Ok OnTime @@ -924,7 +908,6 @@ TAVG TCPConnection TCPThreads TDigest -ThreadMonotonic TINYINT TLSv TMAX @@ -950,7 +933,6 @@ TablesLoaderForegroundThreads TablesLoaderForegroundThreadsActive TablesToDropQueueSize TargetSpecific -tanh Telegraf TemplateIgnoreSpaces TemporaryFilesForAggregation @@ -960,6 +942,7 @@ TemporaryFilesUnknown Testflows Tgz Theil's +ThreadMonotonic ThreadPoolFSReaderThreads ThreadPoolFSReaderThreadsActive ThreadPoolRemoteFSReaderThreads @@ -1002,11 +985,13 @@ UMTS UNDROP UPDATEs URIs +URL URL's URLHash URLHierarchy URLPathHierarchy USearch +UTCTimestamp UUIDNumToString UUIDStringToNum UUIDToNum @@ -1019,7 +1004,6 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch -unshuffled Updatable Uppercased Uptime @@ -1086,6 +1070,7 @@ activerecord addDate addDays addHours +addInterval addMicroseconds addMilliseconds addMinutes @@ -1093,6 +1078,7 @@ addMonths addNanoseconds addQuarters addSeconds +addTupleOfIntervals addWeeks addYears addr @@ -1109,6 +1095,7 @@ aiochclient allocator alphaTokens amplab +analysisOfVariance analytics anonymize anonymized @@ -1136,15 +1123,19 @@ arrayCumSum arrayCumSumNonNegative arrayDifference arrayDistinct +arrayDotProduct arrayElement arrayEnumerate arrayEnumerateDense +arrayEnumerateDenseRanked arrayEnumerateUniq +arrayEnumerateUniqRanked arrayExists arrayFill arrayFilter arrayFirst arrayFirstIndex +arrayFirstOrNull arrayFlatten arrayFold arrayIntersect @@ -1152,10 +1143,12 @@ arrayJaccardIndex arrayJoin arrayLast arrayLastIndex +arrayLastOrNull arrayMap arrayMax arrayMin arrayPartialReverseSort +arrayPartialShuffle arrayPartialSort arrayPopBack arrayPopFront @@ -1175,6 +1168,7 @@ arrayRotateRight arrayShiftLeft arrayShiftRight arrayShingles +arrayShuffle arraySlice arraySort arraySplit @@ -1356,6 +1350,7 @@ collapsingmergetree combinator combinators comparising +composable compressability concat concatAssumeInjective @@ -1365,6 +1360,7 @@ cond conf config configs +conformant congruential conjuction conjuctive @@ -1373,6 +1369,10 @@ const contrib convertCharset coroutines +corrMatrix +corrStable +corrmatrix +corrstable cosineDistance countDigits countEqual @@ -1382,10 +1382,19 @@ countSubstrings countSubstringsCaseInsensitive countSubstringsCaseInsensitiveUTF covarPop +covarPopMatrix +covarPopStable covarSamp +covarSampMatrix +covarSampStable +covarStable covariates covarpop +covarpopmatrix +covarpopstable covarsamp +covarsampmatrix +covarsampstable covid cpp cppkafka @@ -1421,8 +1430,12 @@ cutQueryString cutQueryStringAndFragment cutToFirstSignificantSubdomain cutToFirstSignificantSubdomainCustom +cutToFirstSignificantSubdomainCustomRFC cutToFirstSignificantSubdomainCustomWithWWW +cutToFirstSignificantSubdomainCustomWithWWWRFC +cutToFirstSignificantSubdomainRFC cutToFirstSignificantSubdomainWithWWW +cutToFirstSignificantSubdomainWithWWWRFC cutURLParameter cutWWW cyrus @@ -1509,7 +1522,10 @@ displaySecretsInShowAndSelect distro divideDecimal dmesg +doesnt +domainRFC domainWithoutWWW +domainWithoutWWWRFC dont dotProduct downsampling @@ -1582,8 +1598,11 @@ filesystems finalizeAggregation fips firstLine +firstSignficantSubdomain firstSignificantSubdomain firstSignificantSubdomainCustom +firstSignificantSubdomainCustomRFC +firstSignificantSubdomainRFC fixedstring flamegraph flatbuffers @@ -1606,6 +1625,7 @@ formated formatschema formatter formatters +frac freezed fromDaysSinceYearZero fromModifiedJulianDay @@ -1625,6 +1645,7 @@ gcem generateRandom generateRandomStructure generateSeries +generateSnowflakeID generateULID generateUUIDv geoDistance @@ -1717,8 +1738,8 @@ hasSubsequenceCaseInsensitive hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF hasSubstr -hasToken hasThreadFuzzer +hasToken hasTokenCaseInsensitive hasTokenCaseInsensitiveOrNull hasTokenOrNull @@ -1731,6 +1752,8 @@ hdfs hdfsCluster heredoc heredocs +hilbertDecode +hilbertEncode hiveHash holistics homebrew @@ -1791,8 +1814,10 @@ isIPAddressInRange isIPv isInfinite isNaN +isNotDistinctFrom isNotNull isNull +isNullable isValidJSON isValidUTF isZeroOrNull @@ -1844,6 +1869,8 @@ kolmogorovSmirnovTest kolmogorovsmirnovtest kolya konsole +kostik +kostikConsistentHash kurtPop kurtSamp kurtosis @@ -1855,9 +1882,9 @@ laravel largestTriangleThreeBuckets latencies ldap -leftUTF leftPad leftPadUTF +leftUTF lemmatization lemmatize lemmatized @@ -1905,6 +1932,7 @@ logagent loghouse london lookups +loongarch lowcardinality lowerUTF lowercased @@ -1949,6 +1977,8 @@ mdadm meanZTest meanztest mebibytes +memtable +memtables mergeTreeIndex mergeable mergetree @@ -1973,8 +2003,8 @@ mispredictions mmap mmapped modularization -moduloOrZero moduli +moduloOrZero mongodb monotonicity monthName @@ -1991,10 +2021,21 @@ multiMatchAllIndices multiMatchAny multiMatchAnyIndex multiSearchAllPositions +multiSearchAllPositionsCaseInsensitive +multiSearchAllPositionsCaseInsensitiveUTF multiSearchAllPositionsUTF multiSearchAny +multiSearchAnyCaseInsensitive +multiSearchAnyCaseInsensitiveUTF +multiSearchAnyUTF multiSearchFirstIndex +multiSearchFirstIndexCaseInsensitive +multiSearchFirstIndexCaseInsensitiveUTF +multiSearchFirstIndexUTF multiSearchFirstPosition +multiSearchFirstPositionCaseInsensitive +multiSearchFirstPositionCaseInsensitiveUTF +multiSearchFirstPositionUTF multibyte multidirectory multiline @@ -2080,6 +2121,7 @@ ok omclickhouse onstraints ontime +onwards openSSL openSUSE openldap @@ -2110,6 +2152,9 @@ parseDateTimeInJodaSyntaxOrNull parseDateTimeInJodaSyntaxOrZero parseDateTimeOrNull parseDateTimeOrZero +parseReadableSize +parseReadableSizeOrNull +parseReadableSizeOrZero parseTimeDelta parseable parsers @@ -2143,6 +2188,7 @@ polygonsUnionSpherical polygonsWithinCartesian polygonsWithinSpherical popcnt +portRFC porthttps positionCaseInsensitive positionCaseInsensitiveUTF @@ -2191,6 +2237,7 @@ procfs profiler proleptic prometheus +proportionsZTest proto protobuf protobufsingle @@ -2329,8 +2376,8 @@ retentions rethrow retransmit retriable -rewritable reverseUTF +rewritable rightPad rightPadUTF rightUTF @@ -2391,6 +2438,8 @@ sharding shortcircuit shortkeys shoutout +showCertificate +sigmoid simdjson simpleJSON simpleJSONExtractBool @@ -2404,7 +2453,9 @@ simpleLinearRegression simpleaggregatefunction simplelinearregression simpod +singleValueOrNull singlepart +singlevalueornull sinh sipHash siphash @@ -2448,13 +2499,13 @@ statbox stateful stddev stddevPop -stddevSamp -stddevpop -stddevsamp -stddevpopstable stddevPopStable -stddevsampstable +stddevSamp stddevSampStable +stddevpop +stddevpopstable +stddevsamp +stddevsampstable stderr stdin stdout @@ -2515,6 +2566,7 @@ substrings subtitiles subtractDays subtractHours +subtractInterval subtractMicroseconds subtractMilliseconds subtractMinutes @@ -2522,6 +2574,7 @@ subtractMonths subtractNanoseconds subtractQuarters subtractSeconds +subtractTupleOfIntervals subtractWeeks subtractYears subtree @@ -2532,13 +2585,13 @@ sumCount sumKahan sumMap sumMapFiltered +sumMapFilteredWithOverflow +sumMapWithOverflow sumWithOverflow sumcount sumkahan summap summapwithoverflow -sumMapWithOverflow -sumMapFilteredWithOverflow summingmergetree sumwithoverflow superaggregates @@ -2561,6 +2614,7 @@ tabseparatedrawwithnames tabseparatedrawwithnamesandtypes tabseparatedwithnames tabseparatedwithnamesandtypes +tanh tcp tcpPort tcpnodelay @@ -2632,8 +2686,11 @@ toStartOfFiveMinutes toStartOfHour toStartOfISOYear toStartOfInterval +toStartOfMicrosecond +toStartOfMillisecond toStartOfMinute toStartOfMonth +toStartOfNanosecond toStartOfQuarter toStartOfSecond toStartOfTenMinutes @@ -2669,6 +2726,7 @@ toolset topK topKWeighted topLevelDomain +topLevelDomainRFC topk topkweighted transactional @@ -2695,18 +2753,18 @@ tupleDivide tupleDivideByNumber tupleElement tupleHammingDistance +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber tupleMinus +tupleModulo +tupleModuloByNumber tupleMultiply tupleMultiplyByNumber tupleNegate tuplePlus tupleToNameValuePairs -tupleIntDiv -tupleIntDivByNumber -tupleIntDivOrZero -tupleIntDivOrZeroByNumber -tupleModulo -tupleModuloByNumber turbostat txt typename @@ -2745,10 +2803,12 @@ unixODBC unixodbc unoptimized unparsed +unpooled unrealiable unreplicated unresolvable unrounded +unshuffled untracked untrusted untuple @@ -2760,6 +2820,7 @@ uptrace uring url urlCluster +urlencoded urls usearch userspace diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 23e8b6b2bc4..5c05907e9dd 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -290,8 +290,6 @@ std_cerr_cout_excludes=( /examples/ /tests/ _fuzzer - # DUMP() - base/base/iostream_debug_helpers.h # OK src/Common/ProgressIndication.cpp # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index 5514c34f4ef..4fe0d852fd2 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -4,5 +4,4 @@ if (NOT TARGET ch_contrib::rapidjson) endif () clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -target_link_libraries(keeper-bench PRIVATE dbms) -target_link_libraries(keeper-bench PRIVATE ch_contrib::rapidjson) +target_link_libraries(keeper-bench PRIVATE dbms clickhouse_functions ch_contrib::rapidjson) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index 2212f7158ae..cbf1bcdae23 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -40,54 +40,6 @@ std::string generateRandomString(size_t length) } } -void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) -{ - namespace fs = std::filesystem; - - auto promise = std::make_shared>(); - auto future = promise->get_future(); - - Strings children; - auto list_callback = [promise, &children] (const ListResponse & response) - { - children = response.names; - - promise->set_value(); - }; - zookeeper.list(path, ListRequestType::ALL, list_callback, nullptr); - future.get(); - - while (!children.empty()) - { - Coordination::Requests ops; - for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i) - { - removeRecursive(zookeeper, fs::path(path) / children.back()); - ops.emplace_back(makeRemoveRequest(fs::path(path) / children.back(), -1)); - children.pop_back(); - } - auto multi_promise = std::make_shared>(); - auto multi_future = multi_promise->get_future(); - - auto multi_callback = [multi_promise] (const MultiResponse &) - { - multi_promise->set_value(); - }; - zookeeper.multi(ops, multi_callback); - multi_future.get(); - } - auto remove_promise = std::make_shared>(); - auto remove_future = remove_promise->get_future(); - - auto remove_callback = [remove_promise] (const RemoveResponse &) - { - remove_promise->set_value(); - }; - - zookeeper.remove(path, -1, remove_callback); - remove_future.get(); -} - NumberGetter NumberGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value) { @@ -603,148 +555,16 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) acl.id = "anyone"; default_acls.emplace_back(std::move(acl)); - static const std::string generator_key = "generator"; - - std::cerr << "---- Parsing setup ---- " << std::endl; - static const std::string setup_key = generator_key + ".setup"; - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(setup_key, keys); - for (const auto & key : keys) - { - if (key.starts_with("node")) - { - auto node_key = setup_key + "." + key; - auto parsed_root_node = parseNode(node_key, config); - const auto node = root_nodes.emplace_back(parsed_root_node); - - if (config.has(node_key + ".repeat")) - { - if (!node->name.isRandom()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key); - - auto repeat_count = config.getUInt64(node_key + ".repeat"); - node->repeat_count = repeat_count; - for (size_t i = 1; i < repeat_count; ++i) - root_nodes.emplace_back(node->clone()); - } - - std::cerr << "Tree to create:" << std::endl; - - node->dumpTree(); - std::cerr << std::endl; - } - } - std::cerr << "---- Done parsing data setup ----\n" << std::endl; - std::cerr << "---- Collecting request generators ----" << std::endl; - static const std::string requests_key = generator_key + ".requests"; + static const std::string requests_key = "generator.requests"; request_getter = RequestGetter::fromConfig(requests_key, config); std::cerr << request_getter.description() << std::endl; std::cerr << "---- Done collecting request generators ----\n" << std::endl; } -std::shared_ptr Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) -{ - auto node = std::make_shared(); - node->name = StringGetter::fromConfig(key + ".name", config); - - if (config.has(key + ".data")) - node->data = StringGetter::fromConfig(key + ".data", config); - - Poco::Util::AbstractConfiguration::Keys node_keys; - config.keys(key, node_keys); - - for (const auto & node_key : node_keys) - { - if (!node_key.starts_with("node")) - continue; - - const auto node_key_string = key + "." + node_key; - auto child_node = parseNode(node_key_string, config); - node->children.push_back(child_node); - - if (config.has(node_key_string + ".repeat")) - { - if (!child_node->name.isRandom()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); - - auto repeat_count = config.getUInt64(node_key_string + ".repeat"); - child_node->repeat_count = repeat_count; - for (size_t i = 1; i < repeat_count; ++i) - node->children.push_back(child_node); - } - } - - return node; -} - -void Generator::Node::dumpTree(int level) const -{ - std::string data_string - = data.has_value() ? fmt::format("{}", data->description()) : "no data"; - - std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : ""; - - std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl; - - for (auto it = children.begin(); it != children.end();) - { - const auto & child = *it; - child->dumpTree(level + 1); - std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1); - } -} - -std::shared_ptr Generator::Node::clone() const -{ - auto new_node = std::make_shared(); - new_node->name = name; - new_node->data = data; - new_node->repeat_count = repeat_count; - - // don't do deep copy of children because we will do clone only for root nodes - new_node->children = children; - - return new_node; -} - -void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const -{ - auto path = std::filesystem::path(parent_path) / name.getString(); - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback); - future.get(); - - for (const auto & child : children) - child->createNode(zookeeper, path, acls); -} - void Generator::startup(Coordination::ZooKeeper & zookeeper) { - std::cerr << "---- Creating test data ----" << std::endl; - for (const auto & node : root_nodes) - { - auto node_name = node->name.getString(); - node->name.setString(node_name); - - std::string root_path = std::filesystem::path("/") / node_name; - std::cerr << "Cleaning up " << root_path << std::endl; - removeRecursive(zookeeper, root_path); - - node->createNode(zookeeper, "/", default_acls); - } - std::cerr << "---- Created test data ----\n" << std::endl; - std::cerr << "---- Initializing generators ----" << std::endl; - request_getter.startup(zookeeper); } @@ -752,15 +572,3 @@ Coordination::ZooKeeperRequestPtr Generator::generate() { return request_getter.getRequestGenerator()->generate(default_acls); } - -void Generator::cleanup(Coordination::ZooKeeper & zookeeper) -{ - std::cerr << "---- Cleaning up test data ----" << std::endl; - for (const auto & node : root_nodes) - { - auto node_name = node->name.getString(); - std::string root_path = std::filesystem::path("/") / node_name; - std::cerr << "Cleaning up " << root_path << std::endl; - removeRecursive(zookeeper, root_path); - } -} diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index 5b4c05b2d8b..35dce1a95d9 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -173,27 +173,9 @@ public: void startup(Coordination::ZooKeeper & zookeeper); Coordination::ZooKeeperRequestPtr generate(); - void cleanup(Coordination::ZooKeeper & zookeeper); private: - struct Node - { - StringGetter name; - std::optional data; - std::vector> children; - size_t repeat_count = 0; - - std::shared_ptr clone() const; - - void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; - void dumpTree(int level = 0) const; - }; - - static std::shared_ptr parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config); std::uniform_int_distribution request_picker; - std::vector> root_nodes; RequestGetter request_getter; Coordination::ACLs default_acls; }; - -std::optional getGenerator(const std::string & name); diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index a4b579f1f7b..ed7e09685f0 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1,14 +1,31 @@ #include "Runner.h" +#include #include -#include "Common/ZooKeeper/ZooKeeperCommon.h" -#include "Common/ZooKeeper/ZooKeeperConstants.h" -#include -#include -#include "IO/ReadBufferFromString.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace CurrentMetrics @@ -22,23 +39,43 @@ namespace DB::ErrorCodes { extern const int CANNOT_BLOCK_SIGNAL; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } Runner::Runner( std::optional concurrency_, const std::string & config_path, + const std::string & input_request_log_, + const std::string & setup_nodes_snapshot_path_, const Strings & hosts_strings_, std::optional max_time_, std::optional delay_, std::optional continue_on_error_, std::optional max_iterations_) - : info(std::make_shared()) + : input_request_log(input_request_log_) + , setup_nodes_snapshot_path(setup_nodes_snapshot_path_) + , info(std::make_shared()) { DB::ConfigProcessor config_processor(config_path, true, false); - auto config = config_processor.loadConfig().configuration; + DB::ConfigurationPtr config = nullptr; + + if (!config_path.empty()) + { + config = config_processor.loadConfig().configuration; + + if (config->has("generator")) + generator.emplace(*config); + } + else + { + if (input_request_log.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both --config and --input_request_log cannot be empty"); + + if (!std::filesystem::exists(input_request_log)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "File on path {} does not exist", input_request_log); + } - generator.emplace(*config); if (!hosts_strings_.empty()) { @@ -57,6 +94,8 @@ Runner::Runner( static constexpr uint64_t DEFAULT_CONCURRENCY = 1; if (concurrency_) concurrency = *concurrency_; + else if (!config) + concurrency = DEFAULT_CONCURRENCY; else concurrency = config->getUInt64("concurrency", DEFAULT_CONCURRENCY); std::cerr << "Concurrency: " << concurrency << std::endl; @@ -64,6 +103,8 @@ Runner::Runner( static constexpr uint64_t DEFAULT_ITERATIONS = 0; if (max_iterations_) max_iterations = *max_iterations_; + else if (!config) + max_iterations = DEFAULT_ITERATIONS; else max_iterations = config->getUInt64("iterations", DEFAULT_ITERATIONS); std::cerr << "Iterations: " << max_iterations << std::endl; @@ -71,6 +112,8 @@ Runner::Runner( static constexpr double DEFAULT_DELAY = 1.0; if (delay_) delay = *delay_; + else if (!config) + delay = DEFAULT_DELAY; else delay = config->getDouble("report_delay", DEFAULT_DELAY); std::cerr << "Report delay: " << delay << std::endl; @@ -78,44 +121,48 @@ Runner::Runner( static constexpr double DEFAULT_TIME_LIMIT = 0.0; if (max_time_) max_time = *max_time_; + else if (!config) + max_time = DEFAULT_TIME_LIMIT; else max_time = config->getDouble("timelimit", DEFAULT_TIME_LIMIT); std::cerr << "Time limit: " << max_time << std::endl; if (continue_on_error_) continue_on_error = *continue_on_error_; + else if (!config) + continue_on_error_ = false; else continue_on_error = config->getBool("continue_on_error", false); std::cerr << "Continue on error: " << continue_on_error << std::endl; - static const std::string output_key = "output"; - print_to_stdout = config->getBool(output_key + ".stdout", false); - std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl; - - static const std::string output_file_key = output_key + ".file"; - if (config->has(output_file_key)) + if (config) { - if (config->has(output_file_key + ".path")) - { - file_output = config->getString(output_file_key + ".path"); - output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp"); - } - else - file_output = config->getString(output_file_key); + benchmark_context.initializeFromConfig(*config); - std::cerr << "Result file path: " << file_output->string() << std::endl; + static const std::string output_key = "output"; + print_to_stdout = config->getBool(output_key + ".stdout", false); + std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl; + + static const std::string output_file_key = output_key + ".file"; + if (config->has(output_file_key)) + { + if (config->has(output_file_key + ".path")) + { + file_output = config->getString(output_file_key + ".path"); + output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp"); + } + else + file_output = config->getString(output_file_key); + + std::cerr << "Result file path: " << file_output->string() << std::endl; + } } std::cerr << "---- Run options ----\n" << std::endl; - - pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); - queue.emplace(concurrency); } void Runner::parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config) { - ConnectionInfo default_connection_info; - const auto fill_connection_details = [&](const std::string & key, auto & connection_info) { if (config.has(key + ".secure")) @@ -328,9 +375,770 @@ bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && re void Runner::runBenchmark() { + if (generator) + runBenchmarkWithGenerator(); + else + runBenchmarkFromLog(); +} + + +struct ZooKeeperRequestBlock +{ + explicit ZooKeeperRequestBlock(DB::Block block_) + : block(std::move(block_)) + , hostname_idx(block.getPositionByName("hostname")) + , request_event_time_idx(block.getPositionByName("request_event_time")) + , thread_id_idx(block.getPositionByName("thread_id")) + , session_id_idx(block.getPositionByName("session_id")) + , xid_idx(block.getPositionByName("xid")) + , has_watch_idx(block.getPositionByName("has_watch")) + , op_num_idx(block.getPositionByName("op_num")) + , path_idx(block.getPositionByName("path")) + , data_idx(block.getPositionByName("data")) + , is_ephemeral_idx(block.getPositionByName("is_ephemeral")) + , is_sequential_idx(block.getPositionByName("is_sequential")) + , response_event_time_idx(block.getPositionByName("response_event_time")) + , error_idx(block.getPositionByName("error")) + , requests_size_idx(block.getPositionByName("requests_size")) + , version_idx(block.getPositionByName("version")) + {} + + size_t rows() const + { + return block.rows(); + } + + UInt64 getExecutorId(size_t row) const + { + return getSessionId(row); + } + + std::string getHostname(size_t row) const + { + return getField(hostname_idx, row).safeGet(); + } + + UInt64 getThreadId(size_t row) const + { + return getField(thread_id_idx, row).safeGet(); + } + + DB::DateTime64 getRequestEventTime(size_t row) const + { + return getField(request_event_time_idx, row).safeGet(); + } + + DB::DateTime64 getResponseEventTime(size_t row) const + { + return getField(response_event_time_idx, row).safeGet(); + } + + Int64 getSessionId(size_t row) const + { + return getField(session_id_idx, row).safeGet(); + } + + Int64 getXid(size_t row) const + { + return getField(xid_idx, row).safeGet(); + } + + bool hasWatch(size_t row) const + { + return getField(has_watch_idx, row).safeGet(); + } + + Coordination::OpNum getOpNum(size_t row) const + { + return static_cast(getField(op_num_idx, row).safeGet()); + } + + bool isEphemeral(size_t row) const + { + return getField(is_ephemeral_idx, row).safeGet(); + } + + bool isSequential(size_t row) const + { + return getField(is_sequential_idx, row).safeGet(); + } + + std::string getPath(size_t row) const + { + return getField(path_idx, row).safeGet(); + } + + std::string getData(size_t row) const + { + return getField(data_idx, row).safeGet(); + } + + UInt64 getRequestsSize(size_t row) const + { + return getField(requests_size_idx, row).safeGet(); + } + + std::optional getVersion(size_t row) const + { + auto field = getField(version_idx, row); + if (field.isNull()) + return std::nullopt; + return static_cast(field.safeGet()); + } + + std::optional getError(size_t row) const + { + auto field = getField(error_idx, row); + if (field.isNull()) + return std::nullopt; + + return static_cast(field.safeGet()); + } +private: + DB::Field getField(size_t position, size_t row) const + { + DB::Field field; + block.getByPosition(position).column->get(row, field); + return field; + } + + DB::Block block; + size_t hostname_idx = 0; + size_t request_event_time_idx = 0; + size_t thread_id_idx = 0; + size_t session_id_idx = 0; + size_t xid_idx = 0; + size_t has_watch_idx = 0; + size_t op_num_idx = 0; + size_t path_idx = 0; + size_t data_idx = 0; + size_t is_ephemeral_idx = 0; + size_t is_sequential_idx = 0; + size_t response_event_time_idx = 0; + size_t error_idx = 0; + size_t requests_size_idx = 0; + size_t version_idx = 0; +}; + +struct RequestFromLog +{ + Coordination::ZooKeeperRequestPtr request; + std::optional expected_result; + std::vector> subrequest_expected_results; + int64_t session_id = 0; + size_t executor_id = 0; + bool has_watch = false; + DB::DateTime64 request_event_time; + DB::DateTime64 response_event_time; + std::shared_ptr connection; +}; + +struct ZooKeeperRequestFromLogReader +{ + ZooKeeperRequestFromLogReader(const std::string & input_request_log, DB::ContextPtr context) + { + std::optional format_settings; + + file_read_buf = std::make_unique(input_request_log); + auto compression_method = DB::chooseCompressionMethod(input_request_log, ""); + file_read_buf = DB::wrapReadBufferWithCompressionMethod(std::move(file_read_buf), compression_method); + + DB::SingleReadBufferIterator read_buffer_iterator(std::move(file_read_buf)); + auto [columns_description, format] = DB::detectFormatAndReadSchema(format_settings, read_buffer_iterator, context); + + DB::ColumnsWithTypeAndName columns; + columns.reserve(columns_description.size()); + + for (const auto & column_description : columns_description) + columns.push_back(DB::ColumnWithTypeAndName{column_description.type, column_description.name}); + + header_block = std::move(columns); + + file_read_buf + = DB::wrapReadBufferWithCompressionMethod(std::make_unique(input_request_log), compression_method); + + input_format = DB::FormatFactory::instance().getInput( + format, + *file_read_buf, + header_block, + context, + context->getSettingsRef().max_block_size, + format_settings, + 1, + std::nullopt, + /*is_remote_fs*/ false, + DB::CompressionMethod::None, + false); + + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + default_acls.emplace_back(std::move(acl)); + } + + std::optional getNextRequest(bool for_multi = false) + { + RequestFromLog request_from_log; + + if (!current_block) + { + auto chunk = input_format->generate(); + + if (chunk.empty()) + return std::nullopt; + + current_block.emplace(header_block.cloneWithColumns(chunk.detachColumns())); + idx_in_block = 0; + } + + request_from_log.expected_result = current_block->getError(idx_in_block); + request_from_log.session_id = current_block->getSessionId(idx_in_block); + request_from_log.has_watch = current_block->hasWatch(idx_in_block); + request_from_log.executor_id = current_block->getExecutorId(idx_in_block); + request_from_log.request_event_time = current_block->getRequestEventTime(idx_in_block); + request_from_log.response_event_time = current_block->getResponseEventTime(idx_in_block); + + const auto move_row_iterator = [&] + { + if (idx_in_block == current_block->rows() - 1) + current_block.reset(); + else + ++idx_in_block; + }; + + auto op_num = current_block->getOpNum(idx_in_block); + switch (op_num) + { + case Coordination::OpNum::Create: + { + auto create_request = std::make_shared(); + create_request->path = current_block->getPath(idx_in_block); + create_request->data = current_block->getData(idx_in_block); + create_request->is_ephemeral = current_block->isEphemeral(idx_in_block); + create_request->is_sequential = current_block->isSequential(idx_in_block); + request_from_log.request = create_request; + break; + } + case Coordination::OpNum::Set: + { + auto set_request = std::make_shared(); + set_request->path = current_block->getPath(idx_in_block); + set_request->data = current_block->getData(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + set_request->version = std::numeric_limits::max(); + } + request_from_log.request = set_request; + break; + } + case Coordination::OpNum::Remove: + { + auto remove_request = std::make_shared(); + remove_request->path = current_block->getPath(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + remove_request->version = std::numeric_limits::max(); + } + request_from_log.request = remove_request; + break; + } + case Coordination::OpNum::Check: + case Coordination::OpNum::CheckNotExists: + { + auto check_request = std::make_shared(); + check_request->path = current_block->getPath(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + check_request->version = std::numeric_limits::max(); + } + if (op_num == Coordination::OpNum::CheckNotExists) + check_request->not_exists = true; + request_from_log.request = check_request; + break; + } + case Coordination::OpNum::Sync: + { + auto sync_request = std::make_shared(); + sync_request->path = current_block->getPath(idx_in_block); + request_from_log.request = sync_request; + break; + } + case Coordination::OpNum::Get: + { + auto get_request = std::make_shared(); + get_request->path = current_block->getPath(idx_in_block); + request_from_log.request = get_request; + break; + } + case Coordination::OpNum::SimpleList: + case Coordination::OpNum::FilteredList: + { + auto list_request = std::make_shared(); + list_request->path = current_block->getPath(idx_in_block); + request_from_log.request = list_request; + break; + } + case Coordination::OpNum::Exists: + { + auto exists_request = std::make_shared(); + exists_request->path = current_block->getPath(idx_in_block); + request_from_log.request = exists_request; + break; + } + case Coordination::OpNum::Multi: + case Coordination::OpNum::MultiRead: + { + if (for_multi) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Nested multi requests are not allowed"); + + auto requests_size = current_block->getRequestsSize(idx_in_block); + + Coordination::Requests requests; + requests.reserve(requests_size); + move_row_iterator(); + + for (size_t i = 0; i < requests_size; ++i) + { + auto subrequest_from_log = getNextRequest(/*for_multi=*/true); + if (!subrequest_from_log) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to fetch subrequest for {}, subrequest index {}", op_num, i); + + if (!subrequest_from_log->expected_result && request_from_log.expected_result + && request_from_log.expected_result == Coordination::Error::ZOK) + { + subrequest_from_log->expected_result = Coordination::Error::ZOK; + } + + requests.push_back(std::move(subrequest_from_log->request)); + + if (subrequest_from_log->session_id != request_from_log.session_id) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Session id mismatch for subrequest in {}, subrequest index {}", op_num, i); + + if (subrequest_from_log->executor_id != request_from_log.executor_id) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Executor id mismatch for subrequest in {}, subrequest index {}", op_num, i); + + request_from_log.subrequest_expected_results.push_back(subrequest_from_log->expected_result); + } + + request_from_log.request = std::make_shared(requests, default_acls); + + return request_from_log; + } + default: + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unsupported operation {} ({})", op_num, static_cast(op_num)); + } + + move_row_iterator(); + + return request_from_log; + } + +private: + DB::Block header_block; + + std::unique_ptr file_read_buf; + DB::InputFormatPtr input_format; + + std::optional current_block; + size_t idx_in_block = 0; + + Coordination::ACLs default_acls; +}; + + +namespace +{ + +struct RequestFromLogStats +{ + struct Stats + { + std::atomic total = 0; + std::atomic unexpected_results = 0; + }; + + Stats write_requests; + Stats read_requests; +}; + +struct SetupNodeCollector +{ + explicit SetupNodeCollector(const std::string & setup_nodes_snapshot_path) + { + if (setup_nodes_snapshot_path.empty()) + return; + + keeper_context = std::make_shared(true, std::make_shared()); + keeper_context->setDigestEnabled(true); + keeper_context->setSnapshotDisk( + std::make_shared("Keeper-snapshots", setup_nodes_snapshot_path)); + + snapshot_manager.emplace(1, keeper_context); + auto snapshot_result = snapshot_manager->restoreFromLatestSnapshot(); + if (snapshot_result.storage == nullptr) + { + std::cerr << "No initial snapshot found" << std::endl; + initial_storage = std::make_unique( + /* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); + initial_storage->initializeSystemNodes(); + } + else + { + std::cerr << "Loaded initial nodes from snapshot" << std::endl; + initial_storage = std::move(snapshot_result.storage); + } + } + + void processRequest(const RequestFromLog & request_from_log) + { + if (!request_from_log.expected_result.has_value()) + return; + + + auto process_request = [&](const Coordination::ZooKeeperRequest & request, const auto expected_result) + { + const auto & path = request.getPath(); + + if (nodes_created_during_replay.contains(path)) + return; + + auto op_num = request.getOpNum(); + + if (op_num == Coordination::OpNum::Create) + { + if (expected_result == Coordination::Error::ZNODEEXISTS) + { + addExpectedNode(path); + } + else if (expected_result == Coordination::Error::ZOK) + { + nodes_created_during_replay.insert(path); + /// we need to make sure ancestors exist + auto position = path.find_last_of('/'); + if (position != 0) + { + auto parent_path = path.substr(0, position); + addExpectedNode(parent_path); + } + } + } + else if (op_num == Coordination::OpNum::Remove) + { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) + addExpectedNode(path); + } + else if (op_num == Coordination::OpNum::Set) + { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) + addExpectedNode(path); + } + else if (op_num == Coordination::OpNum::Check) + { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) + addExpectedNode(path); + } + else if (op_num == Coordination::OpNum::CheckNotExists) + { + if (expected_result == Coordination::Error::ZNODEEXISTS || expected_result == Coordination::Error::ZBADVERSION) + addExpectedNode(path); + } + else if (request.isReadRequest()) + { + if (expected_result == Coordination::Error::ZOK) + addExpectedNode(path); + } + }; + + const auto & request = request_from_log.request; + if (request->getOpNum() == Coordination::OpNum::Multi || request->getOpNum() == Coordination::OpNum::MultiRead) + { + const auto & multi_request = dynamic_cast(*request); + const auto & subrequests = multi_request.requests; + + for (size_t i = 0; i < subrequests.size(); ++i) + { + const auto & zookeeper_request = dynamic_cast(*subrequests[i]); + const auto subrequest_expected_result = request_from_log.subrequest_expected_results[i]; + if (subrequest_expected_result.has_value()) + process_request(zookeeper_request, *subrequest_expected_result); + + } + } + else + process_request(*request, *request_from_log.expected_result); + } + + void addExpectedNode(const std::string & path) + { + std::lock_guard lock(nodes_mutex); + + if (initial_storage->container.contains(path)) + return; + + new_nodes = true; + std::cerr << "Adding expected node " << path << std::endl; + + Coordination::Requests create_ops; + + size_t pos = 1; + while (true) + { + pos = path.find('/', pos); + if (pos == std::string::npos) + break; + + auto request = zkutil::makeCreateRequest(path.substr(0, pos), "", zkutil::CreateMode::Persistent, true); + create_ops.emplace_back(request); + ++pos; + } + + auto request = zkutil::makeCreateRequest(path, "", zkutil::CreateMode::Persistent, true); + create_ops.emplace_back(request); + + auto next_zxid = initial_storage->getNextZXID(); + + static Coordination::ACLs default_acls = [] + { + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + return Coordination::ACLs{std::move(acl)}; + }(); + + auto multi_create_request = std::make_shared(create_ops, default_acls); + initial_storage->preprocessRequest(multi_create_request, 1, 0, next_zxid, /* check_acl = */ false); + auto responses = initial_storage->processRequest(multi_create_request, 1, next_zxid, /* check_acl = */ false); + if (responses.size() > 1 || responses[0].response->error != Coordination::Error::ZOK) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Invalid response after trying to create a node {}", responses[0].response->error); + } + + void generateSnapshot() + { + std::lock_guard lock(nodes_mutex); + if (!new_nodes) + { + std::cerr << "No new nodes added" << std::endl; + return; + } + + std::cerr << "Generating snapshot with starting data" << std::endl; + DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(initial_storage->getZXID(), 1, std::make_shared()); + DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta); + snapshot_manager->serializeSnapshotToDisk(snapshot); + + new_nodes = false; + } + + std::mutex nodes_mutex; + DB::KeeperContextPtr keeper_context; + Coordination::KeeperStoragePtr initial_storage; + std::unordered_set nodes_created_during_replay; + std::optional snapshot_manager; + bool new_nodes = false; +}; + +void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_for_type) +{ + std::cerr << fmt::format( + "{} requests: {} total, {} with unexpected results ({:.4}%)", + type, + stats_for_type.total, + stats_for_type.unexpected_results, + stats_for_type.total != 0 ? static_cast(stats_for_type.unexpected_results) / stats_for_type.total * 100 : 0.0) + << std::endl; +}; + +void requestFromLogExecutor(std::shared_ptr> queue, RequestFromLogStats & request_stats) +{ + RequestFromLog request_from_log; + std::optional> last_request; + while (queue->pop(request_from_log)) + { + auto request_promise = std::make_shared>(); + last_request = request_promise->get_future(); + Coordination::ResponseCallback callback = [&, + request_promise, + request = request_from_log.request, + expected_result = request_from_log.expected_result, + subrequest_expected_results = std::move(request_from_log.subrequest_expected_results)]( + const Coordination::Response & response) mutable + { + auto & stats = request->isReadRequest() ? request_stats.read_requests : request_stats.write_requests; + + stats.total.fetch_add(1, std::memory_order_relaxed); + + if (expected_result) + { + if (*expected_result != response.error) + stats.unexpected_results.fetch_add(1, std::memory_order_relaxed); + +#if 0 + if (*expected_result != response.error) + { + std::cerr << fmt::format( + "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result) + << std::endl; + + if (const auto * multi_response = dynamic_cast(&response)) + { + std::string subresponses; + for (size_t i = 0; i < multi_response->responses.size(); ++i) + { + subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error); + } + + std::cerr << "Subresponses\n" << subresponses << std::endl; + } + } +#endif + } + + request_promise->set_value(); + }; + + Coordination::WatchCallbackPtr watch; + if (request_from_log.has_watch) + watch = std::make_shared([](const Coordination::WatchResponse &) {}); + + request_from_log.connection->executeGenericRequest(request_from_log.request, callback, watch); + } + + if (last_request) + last_request->wait(); +} + +} + +void Runner::runBenchmarkFromLog() +{ + std::cerr << fmt::format("Running benchmark using requests from {}", input_request_log) << std::endl; + + pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); + + shared_context = DB::Context::createShared(); + global_context = DB::Context::createGlobal(shared_context.get()); + global_context->makeGlobalContext(); + DB::registerFormats(); + + /// Randomly choosing connection index + pcg64 rng(randomSeed()); + std::uniform_int_distribution connection_distribution(0, connection_infos.size() - 1); + + std::unordered_map> zookeeper_connections; + auto get_zookeeper_connection = [&](int64_t session_id) + { + if (auto it = zookeeper_connections.find(session_id); it != zookeeper_connections.end() && !it->second->isExpired()) + return it->second; + + auto connection_idx = connection_distribution(rng); + auto zk_connection = getConnection(connection_infos[connection_idx], connection_idx); + zookeeper_connections.insert_or_assign(session_id, zk_connection); + return zk_connection; + }; + + RequestFromLogStats stats; + + std::optional setup_nodes_collector; + if (!setup_nodes_snapshot_path.empty()) + setup_nodes_collector.emplace(setup_nodes_snapshot_path); + + std::unordered_map>> executor_id_to_queue; + + SCOPE_EXIT_SAFE({ + for (const auto & [executor_id, executor_queue] : executor_id_to_queue) + executor_queue->finish(); + + pool->wait(); + + + if (setup_nodes_collector) + { + setup_nodes_collector->generateSnapshot(); + } + else + { + dumpStats("Write", stats.write_requests); + dumpStats("Read", stats.read_requests); + } + }); + + auto push_request = [&](RequestFromLog request) + { + if (auto it = executor_id_to_queue.find(request.executor_id); it != executor_id_to_queue.end()) + { + auto success = it->second->push(std::move(request)); + if (!success) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue"); + return; + } + + auto executor_queue = std::make_shared>(std::numeric_limits::max()); + executor_id_to_queue.emplace(request.executor_id, executor_queue); + auto scheduled = pool->trySchedule([&, executor_queue]() mutable + { + requestFromLogExecutor(std::move(executor_queue), stats); + }); + + if (!scheduled) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to schedule worker, try to increase concurrency parameter"); + + auto success = executor_queue->push(std::move(request)); + if (!success) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue"); + }; + + if (!setup_nodes_collector) + { + auto setup_connection = getConnection(connection_infos[0], 0); + benchmark_context.startup(*setup_connection); + } + + ZooKeeperRequestFromLogReader request_reader(input_request_log, global_context); + + delay_watch.restart(); + while (auto request_from_log = request_reader.getNextRequest()) + { + if (setup_nodes_collector) + { + setup_nodes_collector->processRequest(*request_from_log); + } + else + { + request_from_log->connection = get_zookeeper_connection(request_from_log->session_id); + push_request(std::move(*request_from_log)); + } + + if (delay > 0 && delay_watch.elapsedSeconds() > delay) + { + if (setup_nodes_collector) + setup_nodes_collector->generateSnapshot(); + else + { + dumpStats("Write", stats.write_requests); + dumpStats("Read", stats.read_requests); + std::cerr << std::endl; + } + delay_watch.restart(); + } + } +} + +void Runner::runBenchmarkWithGenerator() +{ + pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); + queue.emplace(concurrency); createConnections(); std::cerr << "Preparing to run\n"; + benchmark_context.startup(*connections[0]); generator->startup(*connections[0]); std::cerr << "Prepared\n"; @@ -341,7 +1149,7 @@ void Runner::runBenchmark() for (size_t i = 0; i < concurrency; ++i) { auto thread_connections = connections; - pool->scheduleOrThrowOnError([this, connections_ = std::move(thread_connections)]() mutable { thread(connections_); }); + pool->scheduleOrThrowOnError([this, my_connections = std::move(thread_connections)]() mutable { thread(my_connections); }); } } catch (...) @@ -458,8 +1266,232 @@ std::vector> Runner::refreshConnections Runner::~Runner() { - queue->clearAndFinish(); + if (queue) + queue->clearAndFinish(); shutdown = true; - pool->wait(); - generator->cleanup(*connections[0]); + + if (pool) + pool->wait(); + + try + { + auto connection = getConnection(connection_infos[0], 0); + benchmark_context.cleanup(*connection); + } + catch (...) + { + DB::tryLogCurrentException("While trying to clean nodes"); + } +} + +namespace +{ + +void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) +{ + namespace fs = std::filesystem; + + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + Strings children; + auto list_callback = [promise, &children] (const Coordination::ListResponse & response) + { + children = response.names; + promise->set_value(); + }; + zookeeper.list(path, Coordination::ListRequestType::ALL, list_callback, nullptr); + future.get(); + + std::span children_span(children); + while (!children_span.empty()) + { + Coordination::Requests ops; + for (size_t i = 0; i < 1000 && !children.empty(); ++i) + { + removeRecursive(zookeeper, fs::path(path) / children.back()); + ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); + children_span = children_span.subspan(0, children_span.size() - 1); + } + auto multi_promise = std::make_shared>(); + auto multi_future = multi_promise->get_future(); + + auto multi_callback = [multi_promise] (const Coordination::MultiResponse &) + { + multi_promise->set_value(); + }; + zookeeper.multi(ops, multi_callback); + multi_future.get(); + } + auto remove_promise = std::make_shared>(); + auto remove_future = remove_promise->get_future(); + + auto remove_callback = [remove_promise] (const Coordination::RemoveResponse &) + { + remove_promise->set_value(); + }; + + zookeeper.remove(path, -1, remove_callback); + remove_future.get(); +} + +} + +void BenchmarkContext::initializeFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + default_acls.emplace_back(std::move(acl)); + + std::cerr << "---- Parsing setup ---- " << std::endl; + static const std::string setup_key = "setup"; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(setup_key, keys); + for (const auto & key : keys) + { + if (key.starts_with("node")) + { + auto node_key = setup_key + "." + key; + auto parsed_root_node = parseNode(node_key, config); + const auto node = root_nodes.emplace_back(parsed_root_node); + + if (config.has(node_key + ".repeat")) + { + if (!node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key); + + auto repeat_count = config.getUInt64(node_key + ".repeat"); + node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + root_nodes.emplace_back(node->clone()); + } + + std::cerr << "Tree to create:" << std::endl; + + node->dumpTree(); + std::cerr << std::endl; + } + } + std::cerr << "---- Done parsing data setup ----\n" << std::endl; +} + +std::shared_ptr BenchmarkContext::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + auto node = std::make_shared(); + node->name = StringGetter::fromConfig(key + ".name", config); + + if (config.has(key + ".data")) + node->data = StringGetter::fromConfig(key + ".data", config); + + Poco::Util::AbstractConfiguration::Keys node_keys; + config.keys(key, node_keys); + + for (const auto & node_key : node_keys) + { + if (!node_key.starts_with("node")) + continue; + + const auto node_key_string = key + "." + node_key; + auto child_node = parseNode(node_key_string, config); + node->children.push_back(child_node); + + if (config.has(node_key_string + ".repeat")) + { + if (!child_node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); + + auto repeat_count = config.getUInt64(node_key_string + ".repeat"); + child_node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + node->children.push_back(child_node); + } + } + + return node; +} + +void BenchmarkContext::Node::dumpTree(int level) const +{ + std::string data_string + = data.has_value() ? fmt::format("{}", data->description()) : "no data"; + + std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : ""; + + std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl; + + for (auto it = children.begin(); it != children.end();) + { + const auto & child = *it; + child->dumpTree(level + 1); + std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1); + } +} + +std::shared_ptr BenchmarkContext::Node::clone() const +{ + auto new_node = std::make_shared(); + new_node->name = name; + new_node->data = data; + new_node->repeat_count = repeat_count; + + // don't do deep copy of children because we will do clone only for root nodes + new_node->children = children; + + return new_node; +} + +void BenchmarkContext::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const +{ + auto path = std::filesystem::path(parent_path) / name.getString(); + auto promise = std::make_shared>(); + auto future = promise->get_future(); + auto create_callback = [promise] (const Coordination::CreateResponse & response) + { + if (response.error != Coordination::Error::ZOK) + promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); + else + promise->set_value(); + }; + zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback); + future.get(); + + for (const auto & child : children) + child->createNode(zookeeper, path, acls); +} + +void BenchmarkContext::startup(Coordination::ZooKeeper & zookeeper) +{ + if (root_nodes.empty()) + return; + + std::cerr << "---- Creating test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + node->name.setString(node_name); + + std::string root_path = std::filesystem::path("/") / node_name; + std::cerr << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + + node->createNode(zookeeper, "/", default_acls); + } + std::cerr << "---- Created test data ----\n" << std::endl; +} + +void BenchmarkContext::cleanup(Coordination::ZooKeeper & zookeeper) +{ + if (root_nodes.empty()) + return; + + std::cerr << "---- Cleaning up test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + std::string root_path = std::filesystem::path("/") / node_name; + std::cerr << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + } } diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index 4f4a75e6ecf..c19a4d82898 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -1,5 +1,5 @@ #pragma once -#include "Common/ZooKeeper/ZooKeeperConstants.h" +#include "Common/ZooKeeper/ZooKeeperArgs.h" #include #include "Generator.h" #include @@ -12,6 +12,7 @@ #include #include +#include "Interpreters/Context.h" #include "Stats.h" #include @@ -19,12 +20,42 @@ using Ports = std::vector; using Strings = std::vector; +struct BenchmarkContext +{ +public: + void initializeFromConfig(const Poco::Util::AbstractConfiguration & config); + + void startup(Coordination::ZooKeeper & zookeeper); + void cleanup(Coordination::ZooKeeper & zookeeper); + +private: + struct Node + { + StringGetter name; + std::optional data; + std::vector> children; + size_t repeat_count = 0; + + std::shared_ptr clone() const; + + void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; + void dumpTree(int level = 0) const; + }; + + static std::shared_ptr parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + std::vector> root_nodes; + Coordination::ACLs default_acls; +}; + class Runner { public: Runner( std::optional concurrency_, const std::string & config_path, + const std::string & input_request_log_, + const std::string & setup_nodes_snapshot_path_, const Strings & hosts_strings_, std::optional max_time_, std::optional delay_, @@ -44,8 +75,31 @@ public: ~Runner(); private: + struct ConnectionInfo + { + std::string host; + + bool secure = false; + int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; + int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; + int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; + bool use_compression = false; + + size_t sessions = 1; + }; + void parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config); + void runBenchmarkWithGenerator(); + void runBenchmarkFromLog(); + + void createConnections(); + std::vector> refreshConnections(); + std::shared_ptr getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx); + + std::string input_request_log; + std::string setup_nodes_snapshot_path; + size_t concurrency = 1; std::optional pool; @@ -54,7 +108,8 @@ private: double max_time = 0; double delay = 1; bool continue_on_error = false; - std::atomic max_iterations = 0; + size_t max_iterations = 0; + std::atomic requests_executed = 0; std::atomic shutdown = false; @@ -71,25 +126,14 @@ private: using Queue = ConcurrentBoundedQueue; std::optional queue; - struct ConnectionInfo - { - std::string host; - - bool secure = false; - int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; - int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; - int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; - bool use_compression = false; - - size_t sessions = 1; - }; - std::mutex connection_mutex; + ConnectionInfo default_connection_info; std::vector connection_infos; std::vector> connections; std::unordered_map connections_to_info_map; - void createConnections(); - std::shared_ptr getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx); - std::vector> refreshConnections(); + DB::SharedContextHolder shared_context; + DB::ContextMutablePtr global_context; + + BenchmarkContext benchmark_context; }; diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index 0753d66850f..0b963abf406 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -1,8 +1,6 @@ #include #include #include "Runner.h" -#include "Stats.h" -#include "Generator.h" #include "Common/Exception.h" #include #include @@ -27,6 +25,10 @@ int main(int argc, char *argv[]) bool print_stacktrace = true; + //Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + //Poco::Logger::root().setChannel(channel); + //Poco::Logger::root().setLevel("trace"); + try { using boost::program_options::value; @@ -34,12 +36,14 @@ int main(int argc, char *argv[]) boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); desc.add_options() ("help", "produce help message") - ("config", value()->default_value(""), "yaml/xml file containing configuration") - ("concurrency,c", value(), "number of parallel queries") - ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") - ("iterations,i", value(), "amount of queries to be executed") - ("time-limit,t", value(), "stop launch of queries after specified time limit") - ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") + ("config", value()->default_value(""), "yaml/xml file containing configuration") + ("input-request-log", value()->default_value(""), "log of requests that will be replayed") + ("setup-nodes-snapshot-path", value()->default_value(""), "directory containing snapshots with starting state") + ("concurrency,c", value(), "number of parallel queries") + ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") + ("iterations,i", value(), "amount of queries to be executed") + ("time-limit,t", value(), "stop launch of queries after specified time limit") + ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") ("continue_on_errors", "continue testing even if a query fails") ; @@ -56,6 +60,8 @@ int main(int argc, char *argv[]) Runner runner(valueToOptional(options["concurrency"]), options["config"].as(), + options["input-request-log"].as(), + options["setup-nodes-snapshot-path"].as(), options["hosts"].as(), valueToOptional(options["time-limit"]), valueToOptional(options["report-delay"]), @@ -66,9 +72,9 @@ int main(int argc, char *argv[]) { runner.runBenchmark(); } - catch (const DB::Exception & e) + catch (...) { - std::cout << "Got exception while trying to run benchmark: " << e.message() << std::endl; + std::cout << "Got exception while trying to run benchmark: " << DB::getCurrentExceptionMessage(true) << std::endl; } return 0; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index bb184301d03..d876cb8a6d0 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -1,3 +1,5 @@ +// NOLINTBEGIN(clang-analyzer-optin.core.EnumCastOutOfRange) + #include #include #include @@ -96,3 +98,5 @@ int main(int argc, char *argv[]) return 0; } + +// NOLINTEND(clang-analyzer-optin.core.EnumCastOutOfRange) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 1f47a999162..f7d84cce4b1 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.5.1.1763-stable 2024-06-01 v24.4.1.2088-stable 2024-05-01 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03 diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh index 812928e8bd8..f5ae6772139 100755 --- a/utils/prepare-time-trace/prepare-time-trace.sh +++ b/utils/prepare-time-trace/prepare-time-trace.sh @@ -82,3 +82,46 @@ ORDER BY (date, file, pull_request_number, commit_sha, check_name); /// find "$INPUT_DIR" -type f -executable -or -name '*.o' -or -name '*.a' | grep -v cargo | xargs wc -c | grep -v 'total' > "${OUTPUT_DIR}/binary_sizes.txt" + +# Additionally, collect information about the symbols inside translation units +true< '{}.symbols' + " + + find "$INPUT_DIR" -type f -name '*.o.symbols' | xargs cat > "${OUTPUT_DIR}/binary_symbols.txt" +fi diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py index ccf9a82067e..21c6b72e476 100755 --- a/utils/security-generator/generate_security.py +++ b/utils/security-generator/generate_security.py @@ -10,21 +10,22 @@ HEADER = """ -# Security Policy +# ClickHouse Security Vulnerability Response Policy -## Security Announcements -Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). +## Security Change Log and Support -## Scope and Supported Versions +Details regarding security fixes are publicly reported in our [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). A summary of known security vulnerabilities is shown at the bottom of this page. -The following versions of ClickHouse server are currently being supported with security updates: +Vulnerability notifications pre-release or during embargo periods are available to open source users and support customers registered for vulnerability alerts. Refer to our [Embargo Policy](#embargo-policy) below. + +The following versions of ClickHouse server are currently supported with security updates: """ FOOTER = """## Reporting a Vulnerability We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers. -To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement. +To report a potential vulnerability in ClickHouse please send the details about it through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement. ### When Should I Report a Vulnerability? @@ -45,6 +46,24 @@ As the security issue moves from triage, to identified fix, to release planning ## Public Disclosure Timing A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days. + +## Embargo Policy + +Open source users and support customers may subscribe to receive alerts during the embargo period by visiting [https://trust.clickhouse.com/?product=clickhouseoss](https://trust.clickhouse.com/?product=clickhouseoss), requesting access and subscribing for alerts. Subscribers agree not to make these notifications public, issue communications, share this information with others, or issue public patches before the disclosure date. Accidental disclosures must be reported immediately to trust@clickhouse.com. Failure to follow this policy or repeated leaks may result in removal from the subscriber list. + +Participation criteria: +1. Be a current open source user or support customer with a valid corporate email domain (no @gmail.com, @azure.com, etc.). +1. Sign up to the ClickHouse OSS Trust Center at [https://trust.clickhouse.com](https://trust.clickhouse.com). +1. Accept the ClickHouse Security Vulnerability Response Policy as outlined above. +1. Subscribe to ClickHouse OSS Trust Center alerts. + +Removal criteria: +1. Members may be removed for failure to follow this policy or repeated leaks. +1. Members may be removed for bounced messages (mail delivery failure). +1. Members may unsubscribe at any time. + +Notification process: +ClickHouse will post notifications within our OSS Trust Center and notify subscribers. Subscribers must log in to the Trust Center to download the notification. The notification will include the timeframe for public disclosure. """ @@ -79,7 +98,7 @@ def generate_supported_versions() -> str: lts.append(version) to_append = f"| {version} | ✔️ |" if to_append: - if len(regular) == max_regular or len(lts) == max_lts: + if len(regular) == max_regular and len(lts) == max_lts: supported_year = year table.append(to_append) continue diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index be8cf81320c..cad7164b775 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -1,4 +1,6 @@ clickhouse_add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp ${ClickHouse_SOURCE_DIR}/src/Client/LineReader.cpp) -target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(clickhouse-zookeeper-cli PRIVATE + clickhouse_common_zookeeper_no_log + dbms) diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt index 182cb65f194..85e4d18c19f 100644 --- a/utils/zookeeper-dump-tree/CMakeLists.txt +++ b/utils/zookeeper-dump-tree/CMakeLists.txt @@ -1,2 +1,6 @@ clickhouse_add_executable (zookeeper-dump-tree main.cpp ${SRCS}) -target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_io boost::program_options) +target_link_libraries(zookeeper-dump-tree PRIVATE + clickhouse_common_zookeeper_no_log + clickhouse_common_io + dbms + boost::program_options) diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt index 01965413d29..50aaed76110 100644 --- a/utils/zookeeper-remove-by-list/CMakeLists.txt +++ b/utils/zookeeper-remove-by-list/CMakeLists.txt @@ -1,2 +1,5 @@ clickhouse_add_executable (zookeeper-remove-by-list main.cpp ${SRCS}) -target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper_no_log boost::program_options) +target_link_libraries(zookeeper-remove-by-list PRIVATE + clickhouse_common_zookeeper_no_log + dbms + boost::program_options)